INVENTORY = """\
1997,van,Ford,E350
2000,car,Mercury,Cougar
1999,car,Chevy,Venture\
"""


VEHICLES = INVENTORY.split('\n')


class SQLException(Exception):
    pass


class DB:
    def __init__(self, db={}):
        self.db = dict(db)


class DB(DB):
    def create_table(self, table, defs):
        self.db[table] = (defs, [])


class DB(DB):
    def table(self, t_name):
        if t_name in self.db:
            return self.db[t_name]
        raise SQLException('Table (%s) was not found' % repr(t_name))


def sample_db():
    db = DB()
    inventory_def = {'year': int, 'kind': str, 'company': str, 'model': str}
    db.create_table('inventory', inventory_def)
    return db


db = sample_db()
db.table('inventory')

({'year': int, 'kind': str, 'company': str, 'model': str}, [])


class DB(DB):
    def column(self, table_decl, c_name):
        if c_name in table_decl: 
            return table_decl[c_name]
        raise SQLException('Column (%s) was not found' % repr(c_name))


db = sample_db()
decl, rows = db.table('inventory')
db.column(decl, 'year')

int


class DB(DB):
    def do_select(self, query):
        ...

    def do_update(self, query):
        ...

    def do_insert(self, query):
        ...

    def do_delete(self, query):
        ...

    def sql(self, query):
        methods = [('select ', self.do_select),
                   ('update ', self.do_update),
                   ('insert into ', self.do_insert),
                   ('delete from', self.do_delete)]
        for key, method in methods:
            if query.startswith(key):
                return method(query[len(key):])
        raise SQLException('Unknown SQL (%s)' % query)


some_db = DB()
some_db.sql('select year from inventory')


db = DB()


inventory_def = {'year': int, 'kind': str, 'company': str, 'model': str}
db.create_table('inventory', inventory_def)


def update_inventory(sqldb, vehicle):
    inventory_def = sqldb.db['inventory'][0]
    k, v = zip(*inventory_def.items())
    val = [repr(cast(val)) for cast, val in zip(v, vehicle.split(','))]
    sqldb.sql('insert into inventory (%s) values (%s)' % (','.join(k),
                                                          ','.join(val)))


for V in VEHICLES:
    update_inventory(db, V)


db.db

{'inventory': ({'year': int, 'kind': str, 'company': str, 'model': str},
  [{'year': 1997, 'kind': 'van', 'company': 'Ford', 'model': 'E350'},
   {'year': 2000, 'kind': 'car', 'company': 'Mercury', 'model': 'Cougar'},
   {'year': 1999, 'kind': 'car', 'company': 'Chevy', 'model': 'Venture'}])}


db.sql('select year,kind from inventory')

[(1997, 'van'), (2000, 'car'), (1999, 'car')]


db.sql("select company,model from inventory where kind == 'car'")

[('Mercury', 'Cougar'), ('Chevy', 'Venture')]


db.sql("update inventory set year = 1998, company = 'Suzuki' where kind == 'van'")

'1 records were updated'


db.db

{'inventory': ({'year': int, 'kind': str, 'company': str, 'model': str},
  [{'year': 1998, 'kind': 'van', 'company': 'Suzuki', 'model': 'E350'},
   {'year': 2000, 'kind': 'car', 'company': 'Mercury', 'model': 'Cougar'},
   {'year': 1999, 'kind': 'car', 'company': 'Chevy', 'model': 'Venture'}])}


db.sql('select int(year)+10 from inventory')

[2008, 2010, 2009]


db.sql("insert into inventory (year, kind, company, model) values (1, 'charriot', 'Rome', 'Quadriga')")


db.db

{'inventory': ({'year': int, 'kind': str, 'company': str, 'model': str},
  [{'year': 1998, 'kind': 'van', 'company': 'Suzuki', 'model': 'E350'},
   {'year': 2000, 'kind': 'car', 'company': 'Mercury', 'model': 'Cougar'},
   {'year': 1999, 'kind': 'car', 'company': 'Chevy', 'model': 'Venture'},
   {'year': 1, 'kind': 'charriot', 'company': 'Rome', 'model': 'Quadriga'}])}


db.sql("delete from inventory where year < 1900")

'1 records were deleted'


gf = GrammarFuzzer(INVENTORY_GRAMMAR_F)
for _ in range(10):
    query = gf.fuzz()
    print(repr(query))
    try:
        res = db.sql(query)
        print(repr(res))
    except SQLException as e:
        print("> ", e)
        pass
    except:
        traceback.print_exc()
        break
    print()

'select O6fo,-977091.1,-36.46 from inventory'
>  Invalid WHERE ('(O6fo,-977091.1,-36.46)')

'select g3 from inventory where -3.0!=V/g/b+Q*M*G'
>  Invalid WHERE ('(-3.0!=V/g/b+Q*M*G)')

'update inventory set z=a,x=F_,Q=K where p(M)<_*S'
>  Column ('z') was not found

'update inventory set R=L5pk where e*l*y-u>K+U(:)'
>  Column ('R') was not found

'select _/d*Q+H/d(k)<t+M-A+P from inventory'
>  Invalid WHERE ('(_/d*Q+H/d(k)<t+M-A+P)')

'select F5 from inventory'
>  Invalid WHERE ('(F5)')

'update inventory set jWh.=a6 where wcY(M)>IB7(i)'
>  Column ('jWh.') was not found

'update inventory set U=y where L(W<c,(U!=W))<V(((q)==m<F),O,l)'
>  Column ('U') was not found

'delete from inventory where M/b-O*h*E<H-W>e(Y)-P'
>  Invalid WHERE ('M/b-O*h*E<H-W>e(Y)-P')

'select ((kP(86)+b*S+J/Z/U+i(U))) from inventory'
>  Invalid WHERE ('(((kP(86)+b*S+J/Z/U+i(U))))')


db.sql('select year from inventory where year < 2000')

[1998, 1999]


db.sql('select year - 1900 if year < 2000 else year - 2000 from inventory')

[98, 0, 99]


db.sql('select __import__("os").popen("pwd").read() from inventory')

['/Users/zeller/Projects/fuzzingbook/notebooks\n',
 '/Users/zeller/Projects/fuzzingbook/notebooks\n',
 '/Users/zeller/Projects/fuzzingbook/notebooks\n']


class tstr(str):
    """Wrapper for strings, saving taint information"""

    def __new__(cls, value, *args, **kw):
        """Create a tstr() instance. Used internally."""
        return str.__new__(cls, value)

    def __init__(self, value: Any, taint: Any = None, **kwargs) -> None:
        """Constructor.
        `value` is the string value the `tstr` object is to be constructed from.
        `taint` is an (optional) taint to be propagated to derived strings."""
        self.taint: Any = taint


class tstr(tstr):
    def __repr__(self) -> tstr:
        """Return a representation."""
        return tstr(str.__repr__(self), taint=self.taint)


class tstr(tstr):
    def __str__(self) -> str:
        """Convert to string"""
        return str.__str__(self)


thello: tstr = tstr('hello', taint='LOW')


thello.taint

'LOW'


repr(thello).taint  # type: ignore

'LOW'


class tstr(tstr):
    def clear_taint(self):
        """Remove taint"""
        self.taint = None
        return self

    def has_taint(self):
        """Check if taint is present"""
        return self.taint is not None


class tstr(tstr):
    def create(self, s):
        return tstr(s, taint=self.taint)


class tstr(tstr):
    @staticmethod
    def make_str_wrapper(fun):
        """Make `fun` (a `str` method) a method in `tstr`"""
        def proxy(self, *args, **kwargs):
            res = fun(self, *args, **kwargs)
            return self.create(res)

        if hasattr(fun, '__doc__'):
            # Copy docstring
            proxy.__doc__ = fun.__doc__

        return proxy


def informationflow_init_1():
    for name in ['__format__', '__mod__', '__rmod__', '__getitem__',
                 '__add__', '__mul__', '__rmul__',
                 'capitalize', 'casefold', 'center', 'encode',
                 'expandtabs', 'format', 'format_map', 'join',
                 'ljust', 'lower', 'lstrip', 'replace',
                 'rjust', 'rstrip', 'strip', 'swapcase', 'title', 'translate', 'upper']:
        fun = getattr(str, name)
        setattr(tstr, name, tstr.make_str_wrapper(fun))


informationflow_init_1()


INITIALIZER_LIST = [informationflow_init_1]


def initialize():
    for fn in INITIALIZER_LIST:
        fn()


class tstr(tstr):
    def __radd__(self, value):
        """Return value + self, as a `tstr` object"""
        return self.create(value + str(self))


thello = tstr('hello', taint='LOW')


thello[0].taint  # type: ignore

'LOW'


thello[1:3].taint  # type: ignore

'LOW'


(tstr('foo', taint='HIGH') + 'bar').taint  # type: ignore

'HIGH'


('foo' + tstr('bar', taint='HIGH')).taint  # type: ignore

'HIGH'


thello += ', world'  # type: ignore


thello.taint  # type: ignore

'LOW'


(thello * 5).taint  # type: ignore

'LOW'


('hw %s' % thello).taint  # type: ignore

'LOW'


(tstr('hello %s', taint='HIGH') % 'world').taint  # type: ignore

'HIGH'


class TrustedDB(DB):
    def sql(self, s):
        assert isinstance(s, tstr), "Need a tainted string"
        assert s.taint == 'TRUSTED', "Need a string with trusted taint"
        return super().sql(s)


bdb = TrustedDB(db.db)


with ExpectError():
    bdb.sql("select year from INVENTORY")

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/3935989889.py", line 2, in <cell line: 1>
    bdb.sql("select year from INVENTORY")
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/995123203.py", line 3, in sql
    assert isinstance(s, tstr), "Need a tainted string"
AssertionError: Need a tainted string (expected)


bad_user_input = tstr('__import__("os").popen("ls").read()', taint='UNTRUSTED')
with ExpectError():
    bdb.sql(bad_user_input)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/3307042773.py", line 3, in <cell line: 2>
    bdb.sql(bad_user_input)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/995123203.py", line 4, in sql
    assert s.taint == 'TRUSTED', "Need a string with trusted taint"
AssertionError: Need a string with trusted taint (expected)


def sanitize(user_input):
    assert isinstance(user_input, tstr)
    if re.match(
            r'^select +[-a-zA-Z0-9_, ()]+ from +[-a-zA-Z0-9_, ()]+$', user_input):
        return tstr(user_input, taint='TRUSTED')
    else:
        return tstr('', taint='UNTRUSTED')


good_user_input = tstr("select year,model from inventory", taint='UNTRUSTED')
sanitized_input = sanitize(good_user_input)
sanitized_input

'select year,model from inventory'


sanitized_input.taint

'TRUSTED'


bdb.sql(sanitized_input)

[(1998, 'E350'), (2000, 'Cougar'), (1999, 'Venture')]


sanitized_input = sanitize(bad_user_input)
sanitized_input

''


sanitized_input.taint

'UNTRUSTED'


with ExpectError():
    bdb.sql(sanitized_input)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/249000876.py", line 2, in <cell line: 1>
    bdb.sql(sanitized_input)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/995123203.py", line 4, in sql
    assert s.taint == 'TRUSTED', "Need a string with trusted taint"
AssertionError: Need a string with trusted taint (expected)


class Tainted(Exception):
    def __init__(self, v):
        self.v = v

    def __str__(self):
        return 'Tainted[%s]' % self.v


class TaintedDB(DB):
    def my_eval(self, statement, g, l):
        if statement.taint != 'TRUSTED':
            raise Tainted(statement)
        try:
            return eval(statement, g, l)
        except:
            raise SQLException('Invalid SQL (%s)' % repr(statement))


tdb = TaintedDB()


tdb.db = db.db


for _ in range(10):
    query = gf.fuzz()
    print(repr(query))
    try:
        res = tdb.sql(tstr(query, taint='UNTRUSTED'))
        print(repr(res))
    except SQLException as e:
        pass
    except Tainted as e:
        print("> ", e)
    except:
        traceback.print_exc()
        break
    print()

'delete from inventory where y/u-l+f/y<Y(c)/A-H*q'
>  Tainted[y/u-l+f/y<Y(c)/A-H*q]

"insert into inventory (G,Wmp,sl3hku3) values ('<','?')"

"insert into inventory (d0) values (',_G')"

'select P*Q-w/x from inventory where X<j==:==j*r-f'
>  Tainted[(X<j==:==j*r-f)]

'select a>F*i from inventory where Q/I-_+P*j>.'
>  Tainted[(Q/I-_+P*j>.)]

'select (V-i<T/g) from inventory where T/r/G<FK(m)/(i)'
>  Tainted[(T/r/G<FK(m)/(i))]

'select (((i))),_(S,_)/L-k<H(Sv,R,n,W,Y) from inventory'
>  Tainted[((((i))),_(S,_)/L-k<H(Sv,R,n,W,Y))]

'select (N==c*U/P/y),i-e/n*y,T!=w,u from inventory'
>  Tainted[((N==c*U/P/y),i-e/n*y,T!=w,u)]

'update inventory set _=B,n=v where o-p*k-J>T'

'select s from inventory where w4g4<.m(_)/_>t'
>  Tainted[(w4g4<.m(_)/_>t)]


secrets = tstr('<Plenty of secret keys>', taint='SECRET')


secrets[1:3].taint  # type: ignore

'SECRET'


user_input = "hello"
reply = user_input


isinstance(reply, tstr)

False


reply = user_input + secrets[0:5]


reply

'hello<Plen'


reply.taint  # type: ignore

'SECRET'


def send_back(s):
    assert not isinstance(s, tstr) and not s.taint == 'SECRET'  # type: ignore
    ...


with ExpectError():
    send_back(reply)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/3747050841.py", line 2, in <cell line: 1>
    send_back(reply)
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/3158733057.py", line 2, in send_back
    assert not isinstance(s, tstr) and not s.taint == 'SECRET'  # type: ignore
AssertionError (expected)


reply = heartbeat('hello', 5, memory=secrets)


reply.taint  # type: ignore

'SECRET'

# Store reply in memory
    memory = reply + memory[len(reply):]


thilo = tstr("High", taint='HIGH') + tstr("Low", taint='LOW')


thilo

'HighLow'


thilo.taint  # type: ignore

'HIGH'


class ostr(str):
    """Wrapper for strings, saving taint and origin information"""
    DEFAULT_ORIGIN = 0

    def __new__(cls, value, *args, **kw):
        """Create an ostr() instance. Used internally."""
        return str.__new__(cls, value)

    def __init__(self, value: Any, taint: Any = None,
                 origin: Optional[Union[int, List[int]]] = None, **kwargs) -> None:
        """Constructor.
        `value` is the string value the `ostr` object is to be constructed from.
        `taint` is an (optional) taint to be propagated to derived strings.
        `origin` (optional) is either
        - an integer denoting the index of the first character in `value`, or
        - a list of integers denoting the origins of the characters in `value`,
        """
        self.taint = taint

        if origin is None:
            origin = ostr.DEFAULT_ORIGIN
        if isinstance(origin, int):
            self.origin = list(range(origin, origin + len(self)))
        else:
            self.origin = origin
        assert len(self.origin) == len(self)


class ostr(ostr):
    def create(self, s):
        return ostr(s, taint=self.taint, origin=self.origin)


class ostr(ostr):
    UNKNOWN_ORIGIN = -1

    def __repr__(self):
        # handle escaped chars
        origin = [ostr.UNKNOWN_ORIGIN]
        for s, o in zip(str(self), self.origin):
            origin.extend([o] * (len(repr(s)) - 2))

        origin.append(ostr.UNKNOWN_ORIGIN)
        return ostr(str.__repr__(self), taint=self.taint, origin=origin)


class ostr(ostr):
    def __str__(self):
        return str.__str__(self)


othello = ostr('hello')
assert othello.origin == [0, 1, 2, 3, 4]


tworld = ostr('world', origin=6)
assert tworld.origin == [6, 7, 8, 9, 10]


a = ostr("hello\tworld")


repr(a).origin  # type: ignore

[-1, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, -1]


assert type(str(othello)) == str


repr(othello)

"'hello'"


repr(othello).origin  # type: ignore

[-1, 0, 1, 2, 3, 4, -1]


class ostr(ostr):
    def clear_taint(self):
        self.taint = None
        return self

    def has_taint(self):
        return self.taint is not None


class ostr(ostr):
    def clear_origin(self):
        self.origin = [self.UNKNOWN_ORIGIN] * len(self)
        return self

    def has_origin(self):
        return any(origin != self.UNKNOWN_ORIGIN for origin in self.origin)


othello = ostr('Hello')
assert othello.has_origin()


othello.clear_origin()
assert not othello.has_origin()


s = ostr("hello", origin=100)
s[1]

'e'


s[1].origin

[101]


set(s[1].origin) <= set(s.origin)

True


t = ostr("world", origin=200)


set(s.origin) <= set(t.origin)

False


u = s + t + "!"


u.origin

[100, 101, 102, 103, 104, 200, 201, 202, 203, 204, -1]


ostr.UNKNOWN_ORIGIN in u.origin

True


SECRET_ORIGIN = 1000


secret = ostr('<again, some super-secret input>', origin=SECRET_ORIGIN)


print(secret.origin)

[1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031]


hello_s = heartbeat('hello', 5, memory=secret)
hello_s

'hello'


assert isinstance(hello_s, ostr)


print(hello_s.origin)

[-1, -1, -1, -1, -1]


assert hello_s.origin == [ostr.UNKNOWN_ORIGIN] * len(hello_s)


assert all(origin == ostr.UNKNOWN_ORIGIN for origin in hello_s.origin)


assert not any(origin >= SECRET_ORIGIN for origin in hello_s.origin)


hello_s = heartbeat('hello', 32, memory=secret)
hello_s

'hellon, some super-secret input>'


assert isinstance(hello_s, ostr)


print(hello_s.origin)

[-1, -1, -1, -1, -1, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031]


with ExpectError():
    assert hello_s.origin == [ostr.UNKNOWN_ORIGIN] * len(hello_s)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/2698516187.py", line 2, in <cell line: 1>
    assert hello_s.origin == [ostr.UNKNOWN_ORIGIN] * len(hello_s)
AssertionError (expected)


with ExpectError():
    assert all(origin == ostr.UNKNOWN_ORIGIN for origin in hello_s.origin)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/1358366226.py", line 2, in <cell line: 1>
    assert all(origin == ostr.UNKNOWN_ORIGIN for origin in hello_s.origin)
AssertionError (expected)


with ExpectError():
    assert not any(origin >= SECRET_ORIGIN for origin in hello_s.origin)

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/1577803914.py", line 2, in <cell line: 1>
    assert not any(origin >= SECRET_ORIGIN for origin in hello_s.origin)
AssertionError (expected)


class TrackingDB(TaintedDB):
    def my_eval(self, statement, g, l):
        if statement.origin:
            raise Tainted(statement)
        try:
            return eval(statement, g, l)
        except:
            raise SQLException('Invalid SQL (%s)' % repr(statement))


class TaintedGrammarFuzzer(GrammarFuzzer):
    def __init__(self,
                 grammar,
                 start_symbol=START_SYMBOL,
                 expansion_switch=1,
                 log=False):
        self.tainted_start_symbol = ostr(
            start_symbol, origin=[1] * len(start_symbol))
        self.expansion_switch = expansion_switch
        self.log = log
        self.grammar = grammar
        self.c_grammar = canonical(grammar)
        self.init_tainted_grammar()

    def expansion_cost(self, expansion, seen=set()):
        symbols = [e for e in expansion if e in self.c_grammar]
        if len(symbols) == 0:
            return 1

        if any(s in seen for s in symbols):
            return float('inf')

        return sum(self.symbol_cost(s, seen) for s in symbols) + 1

    def fuzz_tree(self):
        tree = (self.tainted_start_symbol, [])
        nt_leaves = [tree]
        expansion_trials = 0
        while nt_leaves:
            idx = random.randint(0, len(nt_leaves) - 1)
            key, children = nt_leaves[idx]
            expansions = self.ct_grammar[key]
            if expansion_trials < self.expansion_switch:
                expansion = random.choice(expansions)
            else:
                costs = [self.expansion_cost(e) for e in expansions]
                m = min(costs)
                all_min = [i for i, c in enumerate(costs) if c == m]
                expansion = expansions[random.choice(all_min)]

            new_leaves = [(token, []) for token in expansion]
            new_nt_leaves = [e for e in new_leaves if e[0] in self.ct_grammar]
            children[:] = new_leaves
            nt_leaves[idx:idx + 1] = new_nt_leaves
            if self.log:
                print("%-40s" % (key + " -> " + str(expansion)))
            expansion_trials += 1
        return tree

    def fuzz(self):
        self.derivation_tree = self.fuzz_tree()
        return self.tree_to_string(self.derivation_tree)


class TaintedGrammarFuzzer(TaintedGrammarFuzzer):
    def init_tainted_grammar(self):
        key_increment, alt_increment, token_increment = 1000, 100, 10
        key_origin = key_increment
        self.ct_grammar = {}
        for key, val in self.c_grammar.items():
            key_origin += key_increment
            os = []
            for v in val:
                ts = []
                key_origin += alt_increment
                for t in v:
                    nt = ostr(t, origin=key_origin)
                    key_origin += token_increment
                    ts.append(nt)
                os.append(ts)
            self.ct_grammar[key] = os

        # a use tracking grammar
        self.ctp_grammar = {}
        for key, val in self.ct_grammar.items():
            self.ctp_grammar[key] = [(v, dict(use=0)) for v in val]


trdb = TrackingDB(db.db)


class TaintedGrammarFuzzer(TaintedGrammarFuzzer):
    def tree_to_string(self, tree):
        symbol, children, *_ = tree
        e = ostr('')
        if children:
            return e.join([self.tree_to_string(c) for c in children])
        else:
            return e if symbol in self.c_grammar else symbol


class TaintedGrammarFuzzer(TaintedGrammarFuzzer):
    def update_grammar(self, origin, dtree):
        def update_tree(dtree, origin):
            key, children = dtree
            if children:
                updated_children = [update_tree(c, origin) for c in children]
                corigin = set.union(
                    *[o for (key, children, o) in updated_children])
                corigin = corigin.union(set(key.origin))
                return (key, children, corigin)
            else:
                my_origin = set(key.origin).intersection(origin)
                return (key, [], my_origin)

        key, children, oset = update_tree(dtree, set(origin))
        for key, alts in self.ctp_grammar.items():
            for alt, o in alts:
                alt_origins = set([i for token in alt for i in token.origin])
                if alt_origins.intersection(oset):
                    o['use'] += 1


def tree_type(tree):
    key, children = tree
    return (type(key), key, [tree_type(c) for c in children])


tgf = TaintedGrammarFuzzer(INVENTORY_GRAMMAR_F)
x = None
for _ in range(10):
    qtree = tgf.fuzz_tree()
    query = tgf.tree_to_string(qtree)
    assert isinstance(query, ostr)
    try:
        print(repr(query))
        res = trdb.sql(query)
        print(repr(res))
    except SQLException as e:
        print(e)
    except Tainted as e:
        print(e)
        origin = e.args[0].origin
        tgf.update_grammar(origin, qtree)
    except:
        traceback.print_exc()
        break
    print()

'select (g!=(9)!=((:)==2==9)!=J)==-7 from inventory'
Tainted[((g!=(9)!=((:)==2==9)!=J)==-7)]

'delete from inventory where ((c)==T)!=5==(8!=Y)!=-5'
Tainted[((c)==T)!=5==(8!=Y)!=-5]

'select (((w==(((X!=------8)))))) from inventory'
Tainted[((((w==(((X!=------8)))))))]

'delete from inventory where ((.==(-3)!=(((-3))))!=(S==(((n))==Y))!=--2!=N==-----0==--0)!=(((((R))))==((v)))!=((((((------2==Q==-8!=(q)!=(((.!=2))==J)!=(1)!=(((-4!=--5==J!=(((A==.)))))!=(((((0==(P!=((R))!=(((j)))!=7))))==O==K))==(q))==--1==((H)==(t)==s!=-6==((y))==R)!=((H))!=W==--4==(P==(u)==-0)!=O==((-5==-------2!=4!=U))!=-1==((((((R!=-6))))))!=1!=Z)))==(((I)!=((S))!=(-4==s)==(7!=(A))==(s)==p==((_)!=(C))==((w)))))))'
Tainted[((.==(-3)!=(((-3))))!=(S==(((n))==Y))!=--2!=N==-----0==--0)!=(((((R))))==((v)))!=((((((------2==Q==-8!=(q)!=(((.!=2))==J)!=(1)!=(((-4!=--5==J!=(((A==.)))))!=(((((0==(P!=((R))!=(((j)))!=7))))==O==K))==(q))==--1==((H)==(t)==s!=-6==((y))==R)!=((H))!=W==--4==(P==(u)==-0)!=O==((-5==-------2!=4!=U))!=-1==((((((R!=-6))))))!=1!=Z)))==(((I)!=((S))!=(-4==s)==(7!=(A))==(s)==p==((_)!=(C))==((w)))))))]

'delete from inventory where ((2)==T!=-1)==N==(P)==((((((6==a)))))!=8)==(3)!=((---7))'
Tainted[((2)==T!=-1)==N==(P)==((((((6==a)))))!=8)==(3)!=((---7))]

'delete from inventory where o!=2==---5==3!=t'
Tainted[o!=2==---5==3!=t]

'select (2) from inventory'
Tainted[((2))]

'select _ from inventory'
Tainted[(_)]

'select L!=(((1!=(Z)==C)!=C))==(((-0==-5==Q!=((--2!=(-0)==((0))==M)==(A))!=(X)!=e==(K==((b)))!=b==9==((((l)!=-7!=4)!=s==G))!=6==((((5==(((v==(((((((a!=d))==0!=4!=(4)==--1==(h)==-8!=(9)==-4)))))!=I!=-4))==v!=(Y==b)))==(a))!=((7)))))))==((4)) from inventory'
Tainted[(L!=(((1!=(Z)==C)!=C))==(((-0==-5==Q!=((--2!=(-0)==((0))==M)==(A))!=(X)!=e==(K==((b)))!=b==9==((((l)!=-7!=4)!=s==G))!=6==((((5==(((v==(((((((a!=d))==0!=4!=(4)==--1==(h)==-8!=(9)==-4)))))!=I!=-4))==v!=(Y==b)))==(a))!=((7)))))))==((4)))]

'delete from inventory where _==(7==(9)!=(---5)==1)==-8'
Tainted[_==(7==(9)!=(---5)==1)==-8]


tgf.ctp_grammar

{'<start>': [(['<query>'], {'use': 10})],
 '<expr>': [(['<bexpr>'], {'use': 8}),
  (['<aexpr>'], {'use': 8}),
  (['(', '<expr>', ')'], {'use': 8}),
  (['<term>'], {'use': 10})],
 '<bexpr>': [(['<aexpr>', '<lt>', '<aexpr>'], {'use': 0}),
  (['<aexpr>', '<gt>', '<aexpr>'], {'use': 0}),
  (['<expr>', '==', '<expr>'], {'use': 8}),
  (['<expr>', '!=', '<expr>'], {'use': 8})],
 '<aexpr>': [(['<aexpr>', '+', '<aexpr>'], {'use': 0}),
  (['<aexpr>', '-', '<aexpr>'], {'use': 0}),
  (['<aexpr>', '*', '<aexpr>'], {'use': 0}),
  (['<aexpr>', '/', '<aexpr>'], {'use': 0}),
  (['<word>', '(', '<exprs>', ')'], {'use': 0}),
  (['<expr>'], {'use': 8})],
 '<exprs>': [(['<expr>', ',', '<exprs>'], {'use': 0}),
  (['<expr>'], {'use': 5})],
 '<lt>': [(['<'], {'use': 0})],
 '<gt>': [(['>'], {'use': 0})],
 '<term>': [(['<number>'], {'use': 9}), (['<word>'], {'use': 9})],
 '<number>': [(['<integer>', '.', '<integer>'], {'use': 0}),
  (['<integer>'], {'use': 9}),
  (['-', '<number>'], {'use': 8})],
 '<integer>': [(['<digit>', '<integer>'], {'use': 0}),
  (['<digit>'], {'use': 9})],
 '<word>': [(['<word>', '<letter>'], {'use': 0}),
  (['<word>', '<digit>'], {'use': 0}),
  (['<letter>'], {'use': 9})],
 '<digit>': [(['0'], {'use': 2}),
  (['1'], {'use': 4}),
  (['2'], {'use': 6}),
  (['3'], {'use': 3}),
  (['4'], {'use': 2}),
  (['5'], {'use': 5}),
  (['6'], {'use': 3}),
  (['7'], {'use': 5}),
  (['8'], {'use': 6}),
  (['9'], {'use': 3})],
 '<letter>': [(['a'], {'use': 2}),
  (['b'], {'use': 1}),
  (['c'], {'use': 1}),
  (['d'], {'use': 1}),
  (['e'], {'use': 1}),
  (['f'], {'use': 0}),
  (['g'], {'use': 1}),
  (['h'], {'use': 1}),
  (['i'], {'use': 0}),
  (['j'], {'use': 1}),
  (['k'], {'use': 0}),
  (['l'], {'use': 1}),
  (['m'], {'use': 0}),
  (['n'], {'use': 1}),
  (['o'], {'use': 1}),
  (['p'], {'use': 1}),
  (['q'], {'use': 1}),
  (['r'], {'use': 0}),
  (['s'], {'use': 2}),
  (['t'], {'use': 2}),
  (['u'], {'use': 1}),
  (['v'], {'use': 2}),
  (['w'], {'use': 2}),
  (['x'], {'use': 0}),
  (['y'], {'use': 1}),
  (['z'], {'use': 0}),
  (['A'], {'use': 2}),
  (['B'], {'use': 0}),
  (['C'], {'use': 2}),
  (['D'], {'use': 0}),
  (['E'], {'use': 0}),
  (['F'], {'use': 0}),
  (['G'], {'use': 1}),
  (['H'], {'use': 1}),
  (['I'], {'use': 2}),
  (['J'], {'use': 2}),
  (['K'], {'use': 2}),
  (['L'], {'use': 1}),
  (['M'], {'use': 1}),
  (['N'], {'use': 2}),
  (['O'], {'use': 1}),
  (['P'], {'use': 2}),
  (['Q'], {'use': 2}),
  (['R'], {'use': 1}),
  (['S'], {'use': 1}),
  (['T'], {'use': 2}),
  (['U'], {'use': 1}),
  (['V'], {'use': 0}),
  (['W'], {'use': 1}),
  (['X'], {'use': 2}),
  (['Y'], {'use': 3}),
  (['Z'], {'use': 2}),
  (['_'], {'use': 3}),
  ([':'], {'use': 1}),
  (['.'], {'use': 1})],
 '<query>': [(['select ', '<exprs>', ' from ', '<table>'], {'use': 5}),
  (['select ', '<exprs>', ' from ', '<table>', ' where ', '<bexpr>'],
   {'use': 0}),
  (['insert into ',
    '<table>',
    ' (',
    '<names>',
    ') values (',
    '<literals>',
    ')'],
   {'use': 0}),
  (['update ', '<table>', ' set ', '<assignments>', ' where ', '<bexpr>'],
   {'use': 0}),
  (['delete from ', '<table>', ' where ', '<bexpr>'], {'use': 5})],
 '<table>': [(['inventory'], {'use': 0})],
 '<names>': [(['<column>', ',', '<names>'], {'use': 0}),
  (['<column>'], {'use': 0})],
 '<column>': [(['<word>'], {'use': 0})],
 '<literals>': [(['<literal>'], {'use': 0}),
  (['<literal>', ',', '<literals>'], {'use': 0})],
 '<literal>': [(['<number>'], {'use': 0}),
  (["'", '<chars>', "'"], {'use': 0})],
 '<assignments>': [(['<kvp>', ',', '<assignments>'], {'use': 0}),
  (['<kvp>'], {'use': 0})],
 '<kvp>': [(['<column>', '=', '<value>'], {'use': 0})],
 '<value>': [(['<word>'], {'use': 0})],
 '<chars>': [(['<char>'], {'use': 0}), (['<char>', '<chars>'], {'use': 0})],
 '<char>': [(['0'], {'use': 0}),
  (['1'], {'use': 0}),
  (['2'], {'use': 0}),
  (['3'], {'use': 0}),
  (['4'], {'use': 0}),
  (['5'], {'use': 0}),
  (['6'], {'use': 0}),
  (['7'], {'use': 0}),
  (['8'], {'use': 0}),
  (['9'], {'use': 0}),
  (['a'], {'use': 0}),
  (['b'], {'use': 0}),
  (['c'], {'use': 0}),
  (['d'], {'use': 0}),
  (['e'], {'use': 0}),
  (['f'], {'use': 0}),
  (['g'], {'use': 0}),
  (['h'], {'use': 0}),
  (['i'], {'use': 0}),
  (['j'], {'use': 0}),
  (['k'], {'use': 0}),
  (['l'], {'use': 0}),
  (['m'], {'use': 0}),
  (['n'], {'use': 0}),
  (['o'], {'use': 0}),
  (['p'], {'use': 0}),
  (['q'], {'use': 0}),
  (['r'], {'use': 0}),
  (['s'], {'use': 0}),
  (['t'], {'use': 0}),
  (['u'], {'use': 0}),
  (['v'], {'use': 0}),
  (['w'], {'use': 0}),
  (['x'], {'use': 0}),
  (['y'], {'use': 0}),
  (['z'], {'use': 0}),
  (['A'], {'use': 0}),
  (['B'], {'use': 0}),
  (['C'], {'use': 0}),
  (['D'], {'use': 0}),
  (['E'], {'use': 0}),
  (['F'], {'use': 0}),
  (['G'], {'use': 0}),
  (['H'], {'use': 0}),
  (['I'], {'use': 0}),
  (['J'], {'use': 0}),
  (['K'], {'use': 0}),
  (['L'], {'use': 0}),
  (['M'], {'use': 0}),
  (['N'], {'use': 0}),
  (['O'], {'use': 0}),
  (['P'], {'use': 0}),
  (['Q'], {'use': 0}),
  (['R'], {'use': 0}),
  (['S'], {'use': 0}),
  (['T'], {'use': 0}),
  (['U'], {'use': 0}),
  (['V'], {'use': 0}),
  (['W'], {'use': 0}),
  (['X'], {'use': 0}),
  (['Y'], {'use': 0}),
  (['Z'], {'use': 0}),
  (['!'], {'use': 0}),
  (['#'], {'use': 0}),
  (['$'], {'use': 0}),
  (['%'], {'use': 0}),
  (['&'], {'use': 0}),
  (['('], {'use': 0}),
  ([')'], {'use': 0}),
  (['*'], {'use': 0}),
  (['+'], {'use': 0}),
  ([','], {'use': 0}),
  (['-'], {'use': 0}),
  (['.'], {'use': 0}),
  (['/'], {'use': 0}),
  ([':'], {'use': 0}),
  ([';'], {'use': 0}),
  (['='], {'use': 0}),
  (['?'], {'use': 0}),
  (['@'], {'use': 0}),
  (['['], {'use': 0}),
  (['\\'], {'use': 0}),
  ([']'], {'use': 0}),
  (['^'], {'use': 0}),
  (['_'], {'use': 0}),
  (['`'], {'use': 0}),
  (['{'], {'use': 0}),
  (['|'], {'use': 0}),
  (['}'], {'use': 0}),
  (['~'], {'use': 0}),
  ([' '], {'use': 0}),
  (['<lt>'], {'use': 0}),
  (['<gt>'], {'use': 0})]}


def strip_all_info(s):
    t = ""
    for c in s:
        t += chr(ord(c))
    return t


othello = ostr("Secret")
othello

'Secret'


othello.origin  # type: ignore

[0, 1, 2, 3, 4, 5]


thello_stripped = strip_all_info(thello)
thello_stripped

'hello, world'


with ExpectError():
    thello_stripped.origin

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/588526133.py", line 2, in <cell line: 1>
    thello_stripped.origin
AttributeError: 'str' object has no attribute 'origin' (expected)


hello = ostr('hello', origin=100)
world = ostr('world', origin=200)
(hello + ' ' + world).origin

[100, 101, 102, 103, 104, -1, 200, 201, 202, 203, 204]


with ExpectError():
    ''.join([hello, ' ', world]).origin  # type: ignore

Traceback (most recent call last):
  File "/var/folders/n2/xd9445p97rb3xh7m1dfx8_4h0006ts/T/ipykernel_26592/2341342688.py", line 2, in <cell line: 1>
    ''.join([hello, ' ', world]).origin  # type: ignore
AttributeError: 'str' object has no attribute 'origin' (expected)


def strip_all_info_again(s):
    t = ""
    for c in s:
        if c == 'a':
            t += 'a'
        elif c == 'b':
            t += 'b'
        elif c == 'c':
            t += 'c'
    ...


thello = tstr('hello', taint='LOW')


thello[:4]

'hell'


thello.taint

'LOW'


thello[1:2].taint  # type: ignore

'LOW'


# ignore
from ClassDiagram import display_class_hierarchy
display_class_hierarchy(tstr)


secret = ostr("joshua1234", origin=100, taint='SECRET')


secret.origin

[100, 101, 102, 103, 104, 105, 106, 107, 108, 109]


secret.taint

'SECRET'


secret_substr = (secret[0:4] + "-" + secret[6:])
secret_substr.taint

'SECRET'


secret_substr.origin

[100, 101, 102, 103, -1, 106, 107, 108, 109]


# ignore
display_class_hierarchy(ostr)

x = tint(42, taint='SECRET')
assert x.taint == 'SECRET'

y = x + 1
assert y.taint == 'SECRET'

x_s = repr(x)
assert x_s.taint == 'SECRET'

password = tstr('1234', taint='NOT_EXACTLY_SECRET')
x = tint(password)
assert x == 1234
assert x.taint == 'NOT_EXACTLY_SECRET'

Tracking Information Flow¶

A Vulnerable Database¶

Representing Tables¶

Executing SQL Statements¶

Fuzzing SQL¶

The Evil of Eval¶

Tracking String Taints¶

A Class for Tainted Strings¶

String Operators¶

Tracking Untrusted Input¶

Taint Aware Fuzzing¶

TaintedDB¶

Preventing Privacy Leaks¶

Tracking Individual Characters¶

A Class for Tracking Character Origins¶

Checking Origins¶

Privacy Leaks Revisited¶

Taint-Directed Fuzzing¶

TrackingDB¶

TaintedGrammarFuzzer¶

The Limits of Taint Tracking¶

Conversions¶

Internal C libraries¶

Implicit Information Flow¶

Enforcing Tainting¶

Synopsis¶

Tracking String Taints¶

Tracking Character Origins¶

Lessons Learned¶

Next Steps¶

Background¶

Exercises¶

Exercise 1: Tainted Numbers¶

Part 1: Creation¶

Part 2: Arithmetic expressions¶

Part 3: Passing taints from integers to strings¶

Part 4: Passing taints from strings to integers¶

Exercise 2: Information Flow Testing¶