# ignore
import sys


# ignore
if sys.version_info < (3, 10):
    print("This code requires Python 3.10 or later")
    sys.exit(0)


EXPR_GRAMMAR: Grammar = {
    "<start>":
        ["<expr>"],

    "<expr>":
        ["<term> + <expr>", "<term> - <expr>", "<term>"],

    "<term>":
        ["<factor> * <term>", "<factor> / <term>", "<factor>"],

    "<factor>":
        ["+<factor>",
         "-<factor>",
         "(<expr>)",
         "<integer>.<integer>",
         "<integer>"],

    "<integer>":
        ["<digit><integer>", "<digit>"],

    "<digit>":
        ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
}


assert is_valid_grammar(EXPR_GRAMMAR)


expr_solver = ISLaSolver(EXPR_GRAMMAR)
for _ in range(10):
    print(expr_solver.solve())

4.3 + 512 / -(7 / 6 - 0 / 9 * 1 * 1) * +8.3 / 7 * 4 / 6
(4 / 7 + 1) / (4) / 9 / 8 + 4 / (3 + 6 - 7)
+--(--(-9) * (4 * 7 + (4) + 4) + --(+(3)) - 6 + 0 / 7 + 7)
(2 * 6 + 0 - 5) * 4 - +1 * (2 - 2) / 8 / 6
(+-(0 - (1) * 7 / 3)) / ((1 * 3 + 8) + 9 - +1 / --0) - 5 * (-+939.491)
+2.9 * 0 / 501.19814 / --+--(6.05002)
+-8.8 / (1) * -+1 + -8 + 9 - 3 / 8 * 6 + 4 * 3 * 5
(+(8 / 9 - 1 - 7)) + ---06.30 / +4.39
8786.82 - +01.170 / 9.2 - +(7) + 1 * 9 - 0
+-6 * 0 / 5 * (-(1.7 * +(-1 / +4.9 * 5 * 1 * 2) + -4.2 + (6 + -5) / (4 * 3 + 4)))


expr_solver.check('2 + 2')

True


expr_solver.check('2 +  2')

Error parsing "2 +  2" starting with "<start>"

False


expr_solver.check('2+2')

Error parsing "2+2" starting with "<start>"

False


expr_solver.check('2 + 2    # should be 4')

Error parsing "2 + 2    # should be 4" starting with "<start>"

False


expr_solver.check('2 + \\\n2')  # An expression split over two lines

Error parsing "2 + \
2" starting with "<start>"

False


def main():
    print("Hello, world!")  # A simple example


main()

Hello, world!


main_source = inspect.getsource(main)
print(main_source)

def main():
    print("Hello, world!")  # A simple example


main_tree = ast.parse(main_source)


show_ast(main_tree)


print(ast.dump(main_tree, indent=4))

Module(
    body=[
        FunctionDef(
            name='main',
            args=arguments(
                posonlyargs=[],
                args=[],
                kwonlyargs=[],
                kw_defaults=[],
                defaults=[]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Constant(value='Hello, world!')],
                        keywords=[]))],
            decorator_list=[])],
    type_ignores=[])


my_main_tree = Module(
    body=[
        FunctionDef(
            name='main',
            args=arguments(
                posonlyargs=[],
                args=[],
                kwonlyargs=[],
                kw_defaults=[],
                defaults=[]),
            body=[
                Expr(
                    value=Call(
                        func=Name(id='print', ctx=Load()),
                        args=[
                            Constant(value='Hello, world!')],
                        keywords=[]))],
            decorator_list=[])],
    type_ignores=[])


my_main_tree = fix_missing_locations(my_main_tree)  # required for trees built from constructors
my_main_code = compile(my_main_tree, filename='<unknown>', mode='exec')


del main  # This deletes the definition of main()


exec(my_main_code)  # This defines main() again from `code`


main()

Hello, world!


print(ast.unparse(my_main_tree))

def main():
    print('Hello, world!')


ANYTHING_BUT_DOUBLE_QUOTES_AND_BACKSLASH = (string.digits + string.ascii_letters + string.punctuation + ' ').replace('"', '').replace('\\', '')
ANYTHING_BUT_SINGLE_QUOTES_AND_BACKSLASH = (string.digits + string.ascii_letters + string.punctuation + ' ').replace("'", '').replace('\\', '')


ANYTHING_BUT_DOUBLE_QUOTES_AND_BACKSLASH

"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!#$%&'()*+,-./:;<=>?@[]^_`{|}~ "


ANYTHING_BUT_SINGLE_QUOTES_AND_BACKSLASH

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&()*+,-./:;<=>?@[]^_`{|}~ '


PYTHON_AST_CONSTANTS_GRAMMAR: Grammar = {
    '<start>': [ '<expr>' ],

    # Expressions
    '<expr>': [ '<Constant>', '<Expr>' ],
    '<Expr>': [ 'Expr(value=<expr>)' ],

    # Constants
    '<Constant>': [ 'Constant(value=<literal>)' ],
    '<literal>': [ '<string>', '<integer>', '<float>', '<bool>', '<none>' ],

    # Strings
    '<string>': [ '"<not_double_quotes>*"', "'<not_single_quotes>*'" ],
    '<not_double_quotes>': list(ANYTHING_BUT_DOUBLE_QUOTES_AND_BACKSLASH),
    '<not_single_quotes>': list(ANYTHING_BUT_SINGLE_QUOTES_AND_BACKSLASH),
    # FIXME: The actual rules for Python strings are also more complex:
    # https://docs.python.org/3/reference/lexical_analysis.html#numeric-literals

    # Numbers
    '<integer>': [ '<digit>', '<nonzerodigit><digits>' ],
    '<float>': [ '<integer>.<integer>' ],
    '<nonzerodigit>': ['1', '2', '3', '4', '5', '6', '7', '8', '9'],
    '<digits>': [ '<digit><digits>', '<digit>' ],
    '<digit>': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
    # FIXME: There are _many_ more ways to express numbers in Python; see
    # https://docs.python.org/3/reference/lexical_analysis.html#numeric-literals

    # More
    '<bool>': [ 'True', 'False' ],
    '<none>': [ 'None' ],

    # FIXME: Not supported: bytes, format strings, regex strings...
}


assert is_valid_grammar(PYTHON_AST_CONSTANTS_GRAMMAR)


constants_grammar = convert_ebnf_grammar(PYTHON_AST_CONSTANTS_GRAMMAR)
constants_solver = ISLaSolver(constants_grammar)
constants_tree_str = str(constants_solver.solve())
print(constants_tree_str)

Expr(value=Constant(value=None))


constants_tree = eval(constants_tree_str)
ast.unparse(constants_tree)

'None'


def test_samples(grammar: Grammar, iterations: int = 10, start_symbol = None, log: bool = True):
    g = convert_ebnf_grammar(grammar)
    solver = ISLaSolver(g, start_symbol=start_symbol, max_number_free_instantiations=iterations)
    for i in range(iterations):
        tree_str = str(solver.solve())
        tree = eval(tree_str)
        ast.fix_missing_locations(tree)
        if log:
            code = ast.unparse(tree)
            print(f'{code:40} # {tree_str}')


test_samples(PYTHON_AST_CONSTANTS_GRAMMAR)

False                                    # Expr(value=Constant(value=False))
2                                        # Constant(value=2)
None                                     # Constant(value=None)
'#'                                      # Constant(value="#")
550.81                                   # Constant(value=550.81)
True                                     # Constant(value=True)
'.'                                      # Constant(value='.')
467                                      # Constant(value=467)
7894                                     # Constant(value=7894)
263                                      # Constant(value=263)


sample_constant_code = "4711"
sample_constant_ast = ast.parse(sample_constant_code).body[0]  # get the `Expr` node
sample_constant_ast_str = ast.dump(sample_constant_ast)
print(sample_constant_ast_str)

Expr(value=Constant(value=4711))


constant_solver = ISLaSolver(constants_grammar)
constant_solver.check(sample_constant_ast_str)

True


ast.unparse(Constant(value=-1))

'-1'


quiz("If we parse a negative number, do we obtain ",
    [
        "a `Constant()` with a negative value, or",
        "a unary `-` operator applied to a positive value?"
    ], 1 ** 0 + 1 ** 1)


print(ast.dump(ast.parse('-1')))

Module(body=[Expr(value=UnaryOp(op=USub(), operand=Constant(value=1)))], type_ignores=[])


sample_constant_code = "-1"
sample_constant_ast = ast.parse(sample_constant_code).body[0]  # get the `Expr` node
sample_constant_ast_str = ast.dump(sample_constant_ast)
constant_solver = ISLaSolver(constants_grammar)
constant_solver.check(sample_constant_ast_str)

Error parsing "Expr(value=UnaryOp(op=USub(), operand=Constant(value=1)))" starting with "<start>"

False


PYTHON_AST_GRAMMAR = PYTHON_AST_MODULE_GRAMMAR
python_ast_grammar = convert_ebnf_grammar(PYTHON_AST_GRAMMAR)


for elt in [ '<FunctionDef>' ]:
    print(elt)
    test_samples(PYTHON_AST_GRAMMAR, start_symbol=elt)
    print()

<FunctionDef>
def w():
    pass                        # FunctionDef(name='w', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Pass()], decorator_list=[])
def a():
    break                       # FunctionDef(name='a', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Break()], decorator_list=[])
def o():
    return                      # FunctionDef(name='o', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[])
def v(): # type: 
    continue           # FunctionDef(name='v', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Continue()], decorator_list=[], type_comment='')
def j(): # type: 
    return             # FunctionDef(name='j', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[], type_comment="")
def k():
    return
    return           # FunctionDef(name='k', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return(), Return()], decorator_list=[])
def Q() -> set(): # type: 
    return    # FunctionDef(name='Q', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[], returns=Call(func=Name(id="set", ctx=Load()), args=[], keywords=[]), type_comment='')
def d() -> None:
    return
    assert set(), set()
    return # FunctionDef(name='d', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return(), Assert(test=Call(func=Name(id="set", ctx=Load()), args=[], keywords=[]), msg=Call(func=Name(id="set", ctx=Load()), args=[], keywords=[])), Return()], decorator_list=[], returns=Constant(value=None))
def K() -> set():
    return             # FunctionDef(name='K', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[], returns=Call(func=Name(id="set", ctx=Load()), args=[], keywords=[]))
def y(): # type: 
    return             # FunctionDef(name='y', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[], type_comment='')


class PythonFuzzer(ISLaSolver):
    """Produce Python code."""

    def __init__(self,
                 start_symbol: Optional[str] = None, *,
                 grammar: Optional[Grammar] = None,
                 constraint: Optional[str] =None,
                 **kw_params) -> None:
        """Produce Python code. Parameters are:

        * `start_symbol`: The grammatical entity to be generated (default: `<FunctionDef>`)
        * `grammar`: The EBNF grammar to be used (default: `PYTHON__AST_GRAMMAR`); and
        * `constraint` an ISLa constraint (if any).

        Additional keyword parameters are passed to the `ISLaSolver` superclass.
        """
        if start_symbol is None:
            start_symbol = '<FunctionDef>'
        if grammar is None:
            grammar = PYTHON_AST_GRAMMAR
        assert start_symbol in grammar

        g = convert_ebnf_grammar(grammar)
        if constraint is None:
            super().__init__(g, start_symbol=start_symbol, **kw_params)
        else:
            super().__init__(g, constraint, start_symbol=start_symbol, **kw_params)

    def fuzz(self) -> str:
        """Produce a Python code string."""
        abstract_syntax_tree = eval(str(self.solve()))
        ast.fix_missing_locations(abstract_syntax_tree)
        return ast.unparse(abstract_syntax_tree)


fuzzer = PythonFuzzer()
print(fuzzer.fuzz())

def L():
    continue


fuzzer = PythonFuzzer('<While>')
print(fuzzer.fuzz())

while (set()[set():set()], *(set())):
    if {}:
        while set():
            continue
        break
    else:
        del 
        return


sorted(list(PYTHON_AST_GRAMMAR.keys()))

['<Assert>',
 '<Assign>',
 '<Attribute>',
 '<AugAssign>',
 '<BinOp>',
 '<BoolOp>',
 '<Break>',
 '<Call>',
 '<Compare>',
 '<Constant>',
 '<Continue>',
 '<Delete>',
 '<Dict>',
 '<EmptySet>',
 '<Expr>',
 '<For>',
 '<FunctionDef>',
 '<If>',
 '<List>',
 '<Module>',
 '<Name>',
 '<Pass>',
 '<Return>',
 '<Set>',
 '<Slice>',
 '<Starred>',
 '<Subscript>',
 '<Tuple>',
 '<UnaryOp>',
 '<While>',
 '<With>',
 '<arg>',
 '<arg_list>',
 '<args>',
 '<args_param>',
 '<arguments>',
 '<bool>',
 '<boolop>',
 '<cmpop>',
 '<cmpop_list>',
 '<cmpops>',
 '<decorator_list_param>',
 '<defaults_param>',
 '<digit>',
 '<digits>',
 '<expr>',
 '<expr_list>',
 '<exprs>',
 '<float>',
 '<func>',
 '<id>',
 '<id_continue>',
 '<id_start>',
 '<identifier>',
 '<integer>',
 '<keyword>',
 '<keyword_list>',
 '<keywords>',
 '<keywords_param>',
 '<kw_defaults_param>',
 '<kwarg>',
 '<kwonlyargs_param>',
 '<lhs_Attribute>',
 '<lhs_List>',
 '<lhs_Name>',
 '<lhs_Starred>',
 '<lhs_Subscript>',
 '<lhs_Tuple>',
 '<lhs_expr>',
 '<lhs_exprs>',
 '<literal>',
 '<mod>',
 '<none>',
 '<nonempty_expr_list>',
 '<nonempty_lhs_expr_list>',
 '<nonempty_stmt_list>',
 '<nonzerodigit>',
 '<not_double_quotes>',
 '<not_single_quotes>',
 '<operator>',
 '<orelse_param>',
 '<posonlyargs_param>',
 '<returns>',
 '<start>',
 '<stmt>',
 '<stmt_list>',
 '<stmts>',
 '<string>',
 '<type_comment>',
 '<type_ignore>',
 '<type_ignore_list>',
 '<type_ignore_param>',
 '<type_ignores>',
 '<unaryop>',
 '<vararg>',
 '<withitem>',
 '<withitem_list>',
 '<withitems>']


PYTHON_AST_GRAMMAR['<FunctionDef>']

['FunctionDef(name=<identifier>, args=<arguments>, body=<nonempty_stmt_list><decorator_list_param><returns>?<type_comment>?)']


python_ast_grammar_without_decorators: Grammar = extend_grammar(PYTHON_AST_GRAMMAR,
{
    '<FunctionDef>' :
        ['FunctionDef(name=<identifier>, args=<arguments>, body=<nonempty_stmt_list>, decorator_list=[])']
})


with ExpectError():
    assert is_valid_grammar(python_ast_grammar_without_decorators)

'<decorator_list_param>': defined, but not used. Consider applying trim_grammar() on the grammar
'<returns>': defined, but not used. Consider applying trim_grammar() on the grammar
'<decorator_list_param>': unreachable from <start>. Consider applying trim_grammar() on the grammar
'<returns>': unreachable from <start>. Consider applying trim_grammar() on the grammar
Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_32402/3611578183.py", line 2, in <cell line: 1>
    assert is_valid_grammar(python_ast_grammar_without_decorators)
AssertionError (expected)


python_ast_grammar_without_decorators = trim_grammar(python_ast_grammar_without_decorators)


assert is_valid_grammar(python_ast_grammar_without_decorators)


fuzzer = PythonFuzzer(grammar=python_ast_grammar_without_decorators)
print(fuzzer.fuzz())

def X():
    break


fuzzer = PythonFuzzer(constraint='str.len(<id>) = 10')
print(fuzzer.fuzz())

def yWOOLwypwp(): # type: 
    return


# Also works (the <identifier> has quotes)
fuzzer = PythonFuzzer(constraint='<FunctionDef>.<identifier> = "\'my_favorite_function\'"')
print(fuzzer.fuzz())

@[set(), set()]
@set() | {}
@(-*set())[set():():
set()[:]()]
def my_favorite_function(dlFf=Qr, l1M=set(), *) -> 942.5:
    return


fuzzer = PythonFuzzer(constraint=
"""
    exists <integer> x:
        (inside(x, <nonempty_stmt_list>) and str.to.int(x) > 1000)
""")
print(fuzzer.fuzz())

@[set(), +set(), 
set()]
@{set(): set(), set(): set()}
@(set(), *set() & set())
def l(r, a, /, *uXLV, _=set()[:], **Z) -> sdTYWE9b or {set(), set().R}.Vy != z1vw([]):
    del 1008


# This will not work with ISLa 2
fuzzer = PythonFuzzer(constraint="""
    forall <FunctionDef> def: count(def, "<stmt>", "3")
""")
print(fuzzer.fuzz())

@3.91
def V8(w, /, *, t=set(), C5D=set(), **foT6):
    if *{}.S[:] - ((set()) not in set() in set()):
        break
    else:
        return


# ignore
# with ExpectError(mute=True):
#     # Triggers an ISLa error (AssertionError)
#     fuzzer = PythonFuzzer(constraint='''
#         str.contains(<FunctionDef>, "decorator_list=[]")
#     ''')
#     print(fuzzer.fuzz())


# ignore
# with ExpectError(mute=True):
#     # Triggers an ISLa error (AssertionError)
#     fuzzer = PythonFuzzer(constraint='<FunctionDef>.<expr_list> = "[]"')
#     print(fuzzer.fuzz())


fuzzer = PythonFuzzer(constraint='<FunctionDef>..<expr_list> = "[]"')
print(fuzzer.fuzz())

def l(Jws4IzSPx_O2ajk687obQB3mflULCTJWnAv9GHg0YRtVNycueKFDMihZ5rXd1pqEo, /, *, **g):
    return


def sum(a, b):    # A simple example
    the_sum = a + b
    return the_sum


sum_source = inspect.getsource(sum)
sum_tree = ast.parse(sum_source)
print(ast.unparse(sum_tree))

def sum(a, b):
    the_sum = a + b
    return the_sum


sum_str = ast.dump(sum_tree)
sum_str

"Module(body=[FunctionDef(name='sum', args=arguments(posonlyargs=[], args=[arg(arg='a'), arg(arg='b')], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Assign(targets=[Name(id='the_sum', ctx=Store())], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load()))), Return(value=Name(id='the_sum', ctx=Load()))], decorator_list=[])], type_ignores=[])"


solver = ISLaSolver(python_ast_grammar)
assert solver.check(sum_str)


sum_tree = solver.parse(sum_str)


len(repr(sum_tree))

8737


repr(sum_tree)[:200]

"DerivationTree('<start>', (DerivationTree('<mod>', (DerivationTree('<Module>', (DerivationTree('Module(body=', (), id=495073), DerivationTree('<nonempty_stmt_list>', (DerivationTree('[', (), id=495071"


display_tree(sum_tree)


python_ast_grammar['<start>']

['<mod>']


python_ast_grammar['<mod>']

['<Module>']


python_ast_grammar['<Module>']

['Module(body=<nonempty_stmt_list><type_ignore_param>)']


str(sum_tree)

"Module(body=[FunctionDef(name='sum', args=arguments(posonlyargs=[], args=[arg(arg='a'), arg(arg='b')], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Assign(targets=[Name(id='the_sum', ctx=Store())], value=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Name(id='b', ctx=Load()))), Return(value=Name(id='the_sum', ctx=Load()))], decorator_list=[])], type_ignores=[])"


sum_ast = ast.fix_missing_locations(eval(str(sum_tree)))
print(ast.unparse(sum_ast))

def sum(a, b):
    the_sum = a + b
    return the_sum


sum_mutated_tree = solver.mutate(sum_str, min_mutations=1, max_mutations=1)


sum_mutated_ast = ast.fix_missing_locations(eval(str(sum_mutated_tree)))
print(ast.unparse(sum_mutated_ast))

def sum(a, b):
    the_sum = a + b
    return the_sum


sum_mutated_tree = solver.mutate(sum_str, min_mutations=10, max_mutations=20)


sum_mutated_ast = ast.fix_missing_locations(eval(str(sum_mutated_tree)))
print(ast.unparse(sum_mutated_ast))

def sum(a, b):
    the_9GuWCvL4cpgyi37K5I_ = a + b
    return the_jXHPe1oqMG


def has_distributive_law(tree) -> bool:
    for node in walk(tree):  # iterate over all nodes in `tree`
        # print(node)
        if isinstance(node, ast.BinOp):
            if isinstance(node.op, ast.Mult):
                if isinstance(node.right, ast.BinOp):
                    if isinstance(node.right.op, ast.Add):
                        return True

                if isinstance(node.left, ast.BinOp):
                    if isinstance(node.left.op, ast.Add):
                        return True

    return False


show_ast(ast.parse("1 + (2 * 3)"))


has_distributive_law(ast.parse("1 * (2 + 3)"))

True


has_distributive_law(ast.parse("(1 + 2) * 3"))

True


has_distributive_law(ast.parse("1 + (2 * 3)"))

False


has_distributive_law(ast.parse("def f(a, b):\n    return a * (b + 10)"))

True


def how_many_mutations(code: str) -> int:
    solver = ISLaSolver(python_ast_grammar)

    code_ast = ast.parse(code)
    code_ast = ast.fix_missing_locations(code_ast)
    code_ast_str = ast.dump(code_ast)
    code_derivation_tree = solver.parse(code_ast_str)
    mutations = 0
    mutated_code_ast = code_ast

    while not has_distributive_law(mutated_code_ast):
        mutations += 1
        if mutations % 100 == 0:
            print(f'{mutations}...', end='')

        mutated_code_str = str(solver.mutate(code_derivation_tree))
        mutated_code_ast = eval(mutated_code_str)
        # mutated_code_ast = ast.fix_missing_locations(mutated_code_ast)
        # print(ast.dump(mutated_code_ast))
        # print(ast.unparse(mutated_code_ast))

    return mutations


assert how_many_mutations('1 * (2 + 3)') == 0


how_many_mutations('2 + 2')    # <-- Note: this can take a minute

54


how_many_mutations('2')  # <-- Note: this can take several minutes

100...200...300...400...500...600...700...800...900...1000...1100...1200...1300...1400...1500...1600...1700...1800...1900...2000...2100...2200...2300...2400...2500...

2500


mult_ast = ast.parse("1 * 2")
with Coverage() as cov:
    has_distributive_law(mult_ast)


cov.coverage()

{('_handle_fromlist', 1063),
 ('_handle_fromlist', 1064),
 ('_handle_fromlist', 1071),
 ('_handle_fromlist', 1075),
 ('_handle_fromlist', 1087),
 ('has_distributive_law', 2),
 ('has_distributive_law', 4),
 ('has_distributive_law', 5),
 ('has_distributive_law', 6),
 ('has_distributive_law', 10),
 ('has_distributive_law', 14),
 ('iter_child_nodes', 264),
 ('iter_child_nodes', 265),
 ('iter_child_nodes', 266),
 ('iter_child_nodes', 267),
 ('iter_child_nodes', 268),
 ('iter_child_nodes', 269),
 ('iter_child_nodes', 270),
 ('iter_fields', 252),
 ('iter_fields', 253),
 ('iter_fields', 254),
 ('walk', 378),
 ('walk', 379),
 ('walk', 380),
 ('walk', 381),
 ('walk', 382),
 ('walk', 383)}


def show_coverage(cov, fun):
    fun_lines, fun_start = inspect.getsourcelines(fun)
    fun_name = fun.__name__
    coverage = cov.coverage()
    for line in range(len(fun_lines)):
        if (fun_name, line + fun_start) in coverage:
            print('# ', end='')  # covered lines
        else:
            print('  ', end='')  # uncovered lines
        print(line + fun_start, fun_lines[line], end='')


show_coverage(cov, has_distributive_law)

  1 def has_distributive_law(tree) -> bool:
# 2     for node in walk(tree):  # iterate over all nodes in `tree`
  3         # print(node)
# 4         if isinstance(node, ast.BinOp):
# 5             if isinstance(node.op, ast.Mult):
# 6                 if isinstance(node.right, ast.BinOp):
  7                     if isinstance(node.right.op, ast.Add):
  8                         return True
  9 
# 10                 if isinstance(node.left, ast.BinOp):
  11                     if isinstance(node.left.op, ast.Add):
  12                         return True
  13 
# 14     return False


def ast_fitness(code_ast) -> int:
    with Coverage() as cov:
        has_distributive_law(code_ast)
    lines = set()
    for (name, line) in cov.coverage():
        if name == has_distributive_law.__name__:
            lines.add(line)
    return len(lines)


ast_fitness(ast.parse("1"))

3


ast_fitness(ast.parse("1 + 1"))

4


ast_fitness(ast.parse("1 * 2"))

6


ast_fitness(ast.parse("1 * (2 + 3)"))

6


def tree_fitness(tree) -> float:
    code_str = str(tree)
    code_ast = ast.fix_missing_locations(eval(code_str))
    fitness = ast_fitness(code_ast)
    # print(ast.unparse(code_ast), f"\n=> Fitness = {fitness}\n")
    return fitness + 1 / len(code_str)


tree_fitness(sum_tree)

4.002666666666666


def initial_population(tree):
    return [ (tree, tree_fitness(tree)) ]


sum_population = initial_population(sum_tree)


len(sum_population)

1


OFFSPRING = 2


def evolve(population, min_fitness=-1):
    solver = ISLaSolver(python_ast_grammar)

    for (candidate, _) in list(population):
        for i in range(OFFSPRING):
            child = solver.mutate(candidate, min_mutations=1, max_mutations=1)
            child_fitness = tree_fitness(child)
            if child_fitness > min_fitness:
                population.append((child, child_fitness))
    return population


sum_population = evolve(sum_population)
len(sum_population)

3


sum_population = evolve(sum_population)
len(sum_population)

9


sum_population = evolve(sum_population)
len(sum_population)

27


sum_population = evolve(sum_population)
len(sum_population)

81


sum_population = evolve(sum_population)
len(sum_population)

243


POPULATION_SIZE = 100


def get_fitness(elem):
    (candidate, fitness) = elem
    return fitness

def select(population):
    population = sorted(population, key=get_fitness, reverse=True)
    population = population[:POPULATION_SIZE]
    return population


sum_population = select(sum_population)
len(sum_population)

100


GENERATIONS = 100  # Upper bound


trial = 1
found = False

while not found:
    sum_population = initial_population(sum_tree)
    prev_best_fitness = -1

    for generation in range(GENERATIONS):
        sum_population = evolve(sum_population, min_fitness=prev_best_fitness)
        sum_population = select(sum_population)
        best_candidate, best_fitness = sum_population[0]
        if best_fitness > prev_best_fitness:
            print(f"Generation {generation}: found new best candidate (fitness={best_fitness}):")
            best_ast = ast.fix_missing_locations(eval(str(best_candidate)))
            print(ast.unparse(best_ast))
            prev_best_fitness = best_fitness

            if has_distributive_law(best_ast):
                print("Done!")
                found = True
                break

    trial = trial + 1
    print(f"\n\nRestarting; trial #{trial}")

Generation 0: found new best candidate (fitness=4.002666666666666):
def sum(a, b):
    the_sum = a + b
    return the_sum
Generation 1: found new best candidate (fitness=4.0027027027027025):
def sum(a, b):
    the_sum = a + b
    return FE
Generation 4: found new best candidate (fitness=4.002865329512894):
def sum():
    the_sum = a + b
    return the_sum
Generation 5: found new best candidate (fitness=6.00094696969697):
if set()[:] * *set():

    def sum(a, b):
        mc = a + b
        return FE
else:
    M = set()
continue

set().f[set():set()]()
Generation 7: found new best candidate (fitness=7.002364066193853):
def sum(a, b):
    mc = (a + b) * ()
    return FE
Done!


Restarting; trial #2


print(ast.unparse(best_ast))

def sum(a, b):
    mc = (a + b) * ()
    return FE


assert has_distributive_law(best_ast)


assert '<BinOp>' in python_ast_grammar['<expr>']


len(python_ast_grammar['<expr>'])

15


assert 'Add()' in python_ast_grammar['<operator>']
assert 'Mult()' in python_ast_grammar['<operator>']


len(python_ast_grammar['<operator>'])

13


(len(python_ast_grammar['<expr>'])       # chances of choosing a `BinOp`
* len(python_ast_grammar['<operator>'])  # chances of choosing a `*`
* len(python_ast_grammar['<expr>'])      # chances of choosing a `BinOp` as a child
* len(python_ast_grammar['<operator>'])  # chances of choosing a `+`
/ 2)   # two chances - one for the left child, one for the right

19012.5


fuzzer = PythonFuzzer()
print(fuzzer.fuzz())

def R():
    break


fuzzer = PythonFuzzer('<While>')
print(fuzzer.fuzz())

while {set()[set():set():set()]}:
    C = set()
    D @= set()
    break
else:
    return


sorted(list(PYTHON_AST_GRAMMAR.keys()))

['<Assert>',
 '<Assign>',
 '<Attribute>',
 '<AugAssign>',
 '<BinOp>',
 '<BoolOp>',
 '<Break>',
 '<Call>',
 '<Compare>',
 '<Constant>',
 '<Continue>',
 '<Delete>',
 '<Dict>',
 '<EmptySet>',
 '<Expr>',
 '<For>',
 '<FunctionDef>',
 '<If>',
 '<List>',
 '<Module>',
 '<Name>',
 '<Pass>',
 '<Return>',
 '<Set>',
 '<Slice>',
 '<Starred>',
 '<Subscript>',
 '<Tuple>',
 '<UnaryOp>',
 '<While>',
 '<With>',
 '<arg>',
 '<arg_list>',
 '<args>',
 '<args_param>',
 '<arguments>',
 '<bool>',
 '<boolop>',
 '<cmpop>',
 '<cmpop_list>',
 '<cmpops>',
 '<decorator_list_param>',
 '<defaults_param>',
 '<digit>',
 '<digits>',
 '<expr>',
 '<expr_list>',
 '<exprs>',
 '<float>',
 '<func>',
 '<id>',
 '<id_continue>',
 '<id_start>',
 '<identifier>',
 '<integer>',
 '<keyword>',
 '<keyword_list>',
 '<keywords>',
 '<keywords_param>',
 '<kw_defaults_param>',
 '<kwarg>',
 '<kwonlyargs_param>',
 '<lhs_Attribute>',
 '<lhs_List>',
 '<lhs_Name>',
 '<lhs_Starred>',
 '<lhs_Subscript>',
 '<lhs_Tuple>',
 '<lhs_expr>',
 '<lhs_exprs>',
 '<literal>',
 '<mod>',
 '<none>',
 '<nonempty_expr_list>',
 '<nonempty_lhs_expr_list>',
 '<nonempty_stmt_list>',
 '<nonzerodigit>',
 '<not_double_quotes>',
 '<not_single_quotes>',
 '<operator>',
 '<orelse_param>',
 '<posonlyargs_param>',
 '<returns>',
 '<start>',
 '<stmt>',
 '<stmt_list>',
 '<stmts>',
 '<string>',
 '<type_comment>',
 '<type_ignore>',
 '<type_ignore_list>',
 '<type_ignore_param>',
 '<type_ignores>',
 '<unaryop>',
 '<vararg>',
 '<withitem>',
 '<withitem_list>',
 '<withitems>']


python_ast_grammar = convert_ebnf_grammar(PYTHON_AST_GRAMMAR)


solver = ISLaSolver(python_ast_grammar, start_symbol='<FunctionDef>')


ast_string = str(solver.solve())
ast_string

'FunctionDef(name=\'y\', args=arguments(posonlyargs=[], args=[], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[Return()], decorator_list=[Call(func=Name(id="set", ctx=Load()), args=[], keywords=[])])'


abstract_syntax_tree = eval(ast_string)


ast.fix_missing_locations(abstract_syntax_tree)
print(ast.unparse(abstract_syntax_tree))

@set()
def y():
    return


# ignore
import inspect
import markdown
from bookutils import HTML


# ignore
sig = inspect.signature(PythonFuzzer.__init__)
sig_str = str(sig) if sig else ""
doc = inspect.getdoc(PythonFuzzer.__init__) or ""
HTML(markdown.markdown('`PythonFuzzer' + sig_str + '`\n\n' + doc))


# ignore
from ClassDiagram import display_class_hierarchy


# ignore
display_class_hierarchy([PythonFuzzer],
                        public_methods=[
                            PythonFuzzer.__init__,
                            PythonFuzzer.fuzz,
                            ISLaSolver.__init__
                        ],
                        project='fuzzingbook')

Testing Compilers¶

A Grammar for Concrete Code¶

Abstract Syntax Trees¶

A Grammar for ASTs¶

Constants¶

Quiz

A Class for Fuzzing Python¶

Customizing the Python Fuzzer¶

Adjusting the Grammar¶

Using Constraints for Customizing¶

Mutating Code¶

Parsing Inputs¶

Mutating Inputs¶

How Effective is Mutation?¶

Evolutionary Fuzzing¶

Getting Coverage¶

Fitness¶

Evolving Inputs¶

Survival of the Fittest¶

Evolution¶

Chances of Evolutionary Fuzzing¶

Synopsis¶

Lessons Learned¶

Background¶