Module `gdascore.gdaAttack`

Expand source code

import re
import subprocess

import coloredlogs, logging
import sqlite3
import simplejson
import psycopg2
import queue
import threading
import sys
import os
import copy
import base64
import time
import pprint
import datetime
import signal
import atexit
import random
import requests
import functools


coloredlogs.DEFAULT_FIELD_STYLES['asctime'] = {}
coloredlogs.DEFAULT_FIELD_STYLES['levelname'] = {'bold': True, 'color': 'white', 'bright': True}
coloredlogs.DEFAULT_LEVEL_STYLES['info'] = {'color': 'cyan', 'bright': True}
coloredlogs.install(
        fmt="[%(levelname)s] %(message)s (%(filename)s, %(funcName)s(), line %(lineno)d, %(asctime)s)",
        datefmt='%Y-%m-%d %H:%M',
        level=logging.INFO,
)
# logging.basicConfig(
#         format="[%(levelname)s] %(message)s (%(filename)s, %(funcName)s(), line %(lineno)d, %(asctime)s)",
#         datefmt='%Y-%m-%d %H:%M',
#         level=logging.INFO,
#     )

# for pdoc documentation
__all__ = ["gdaAttack"]

try:
    from .gdaTools import getInterpolatedValue, getDatabaseInfo
    from .dupCheck import DupCheck
except ImportError:
    from gdaTools import getInterpolatedValue, getDatabaseInfo
    from dupCheck import DupCheck

theCacheQueue = None
theCacheThreadObject = None
flgCacheThreadStarted = False
atcObject = None

class gdaAttack:
    """Manages a GDA Attack

       WARNING: this code is fragile, and can fail ungracefully, or
       just hang."""

    def __init__(self, params):
        """ Everything gets set up with 'gdaAttack(params)'

            params is a dictionary containing the following
            required parameters: <br/>
            `param['name']`: The name of the attack. Make it unique, because
            the cache is discovered using this name. <br/>
            `param['rawDb']`: The label for the DB to be used as the
            raw (non-anonymized) DB. <br/>
            Following are the optional parameters: <br/>
            `param['criteria']`: The criteria by which the attack should
            determined to succeed or fail. Must be one of 'singlingOut',
            'inference', or 'linkability'. Default is 'singlingOut'. <br/>
            `param['anonDb']`: The label for the DB to be used as the
            anonymized DB. (Is automatically set to `param['rawDb']` if
            not set.) <br/>
            `param['pubDb']`: The label for the DB to be used as the
            publicly known DB in linkability attacks. <br/>
            `param['table']`: The table to be attacked. Must be present
            if the DB has more than one table. <br/>
            `param['uid']`: The uid column for the table. Must be present
            if the name of the column is other than 'uid'. <br/>
            `param['flushCache']`: Set to true if you want the cache of
            query answers from a previous run flushed. The purpose of the
            cache is to save the work from an aborted attack, which can be
            substantial because attacks can have hundreds of queries. <br/>
            `param['locCacheDir']`: The directory holding the cache DBs.
            Default 'cacheDBs'. <br/>
            `param['numRawDbThreads']`: The number of parallel queries
            that can be made to the raw DB. Default 3. <br/>
            `param['numAnonDbThreads']`: The number of parallel queries
            that can be made to the anon DB. Default 3. <br/>
            `param['numPubDbThreads']`: The number of parallel queries
            that can be made to the public linkability DB. Default 3. <br/>
            `param['verbose']`: Set to True for verbose output.

            `param['dp_budget']`: An optional overall privacy budget for the attack. For use with uber_dp. Default 'None'. <br/>
        """

        #### gda-score-code version check warning ####
        process = subprocess.run([sys.executable, "-m", "pip", "list","--outdated"],stdout=subprocess.PIPE,stderr=subprocess.PIPE,universal_newlines=True)
        upgradable_pkgs = process.stdout
        if "gda-score-code" in upgradable_pkgs:
            pkgs = upgradable_pkgs.split('\n')
            potential_gdascore_pkgs = list(filter(lambda x: 'gda-score-code' in x, pkgs))
            if len(potential_gdascore_pkgs) == 1:
                gdascore_pkg = potential_gdascore_pkgs[0]
                pkg_name, curr_ver, latest_ver, ins_type = (re.sub(r'\s+', '|', gdascore_pkg)
                                                               .split('|'))
                print('\n')
                logging.warning(f'WARNING: You have {pkg_name} version {curr_ver} installed; '
                                f'however, version {latest_ver} is available.')
                logging.warning(f'You should consider upgrading via the '
                                f'"pip install --upgrade {pkg_name}" command.')
                print('\n')
        ########

        ########### added by frzmohammadali ##########
        global theCacheQueue
        global theCacheThreadObject
        global flgCacheThreadStarted
        global atcObject

        if not theCacheQueue and not theCacheThreadObject:
            theCacheQueue = queue.Queue()
            theCacheThreadObject = CacheThread(theCacheQueue, self)
            atcObject = self
            printTitle('cache thread initialized.')

        self.cacheQueue = theCacheQueue
        self.cacheThreadObject = theCacheThreadObject
        if not flgCacheThreadStarted:
            self.cacheThreadObject.start()
            flgCacheThreadStarted = True
        ##############################################

        ############## parameters and instance variables ###############
        # ------------- Class called parameters and configured parameters
        self._vb = False
        self._cr = ''  # short for criteria
        self._pp = None  # pretty printer (for debugging)
        self._sid = None # for uber_dp interface, a session ID over the attack is needed
        self._session = None # also session for the uber_dp interface
        self._colNamesTypes = []
        self._colNames = []
        self._p = dict(name='',
                  rawDb='',
                  anonDb='',
                  pubDb='',
                  criteria='singlingOut',
                  table='',
                  uid='uid',
                  flushCache=False,
                  verbose=False,
                  # following not normally set by caller, but can be
                  locCacheDir="cacheDBs",
                  numRawDbThreads=3,
                  numAnonDbThreads=3,
                  numPubDbThreads=3,
                  )
        self._requiredParams = ['name', 'rawDb']

        # ---------- Private internal state
        # Threads
        self._rawThreads = []
        self._anonThreads = []
        self._pubThreads = []
        # Queues read by database threads _rawThreads and _anonThreads
        self._rawQ = None
        self._anonQ = None
        self._pubQ = None
        # Queues read by various caller functions
        self._exploreQ = None
        self._knowledgeQ = None
        self._attackQ = None
        self._claimQ = None
        self._guessQ = None
        # ask/get counters for setting 'stillToCome'
        self._exploreCounter = 0
        self._knowledgeCounter = 0
        self._attackCounter = 0
        self._claimCounter = 0
        self._guessCounter = 0
        # State for duplicate claim detection
        self._dupCheck = DupCheck()
        # State for computing attack results (see _initAtkRes())
        self._atrs = {}
        # State for various operational measures (see _initOp())
        self._op = {}
        ##############################################

        if self._vb:
            print(f"Calling {__name__}.init")
        if self._vb:
            print(f"   {params}")
        self._initOp()
        self._initCounters()
        self._assignGlobalParams(params)
        self._doParamChecks()
        for param in self._requiredParams:
            if len(self._p[param]) == 0:
                s = str(f"Error: Need param '{param}' in class parameters")
                sys.exit(s)

        # extract the type of interface we are interacting with the anonymization
        self._type = self._p['anonDb']['type']
        if self._type == 'uber_dp':
            # cannot run attack on uber dp without specifying the budget
            if self._p['dp_budget'] is None:
                s = str(f"Error: Needs param dp_budget in class parameters when running uber_dp attacks")
                sys.exit(s)

            # Assign the privacy budget as a parameter to the attack
            self._remaining_dp_budget = self._p['dp_budget']
            self._initUberDPSession()

            # if no session id was set, the attacks cannot be conducted
            if self._sid is None:
                s = str(f"Failed initializing session with Uber_DP Server")
                sys.exit(s)
        # create the database directory if it doesn't exist
        try:
            if not os.path.exists(self._p['locCacheDir']):
                os.makedirs(self._p['locCacheDir'])
        except OSError:
            sys.exit("Error: Creating directory. " + self._p['locCacheDir'])

        # Get the table name if not provided by the caller
        if len(self._p['table']) == 0:
            tables = self.getTableNames()
            if len(tables) != 1:
                print("Error: gdaAttack(): Must include table name if " +
                      "there is more than one table in database")
                sys.exit()
            self._p['table'] = tables[0]

        # Get the column names for computing susceptibility later
        self._colNamesTypes = self.getColNamesAndTypes()
        if self._vb:
            print(f"Columns are '{self._colNamesTypes}'")
        self._initAtkRes()
        # And make a convenient list of column names
        for colNameType in self._colNamesTypes:
            self._colNames.append(colNameType[0])

        # Setup the database which holds already executed queries so we
        # don't have to repeat them if we are restarting
        self._setupLocalCacheDB()
        # Setup the threads and queues
        self._setupThreadsAndQueues()
        numThreads = threading.active_count()
        expectedThreads = (self._p['numRawDbThreads'] +
                           self._p['numAnonDbThreads'] + 1)
        if len(self._p['pubDb']) > 0:
            expectedThreads += self._p['numPubDbThreads']
        if numThreads < expectedThreads:
            print(f"Error: Some thread(s) died "
                  f"(count {numThreads}, expected {expectedThreads}). "
                  f"Aborting.")
            self.cleanUp(cleanUpCache=False, doExit=True)

    def getResults(self):
        """ Returns all of the compiled attack results.

            This can be input to class `gdaScores()` and method
            `gdaScores.addResult()`."""
        # Add the operational parameters
        self._atrs['operational'] = self.getOpParameters()
        self._cleanPasswords()
        return self._atrs

    def getOpParameters(self):
        """ Returns a variety of performance measurements.

            Useful for debugging."""
        self._op['avQueryDuration'] = 0
        if self._op['numQueries'] > 0:
            self._op['avQueryDuration'] = (
                    self._op['timeQueries'] / self._op['numQueries'])
        self._op['avCachePutDuration'] = 0
        if self._op['numCachePuts'] > 0:
            self._op['avCachePutDuration'] = (
                    self._op['timeCachePuts'] / self._op['numCachePuts'])
        self._op['avCacheGetDuration'] = 0
        if self._op['numCacheGets'] > 0:
            self._op['avCacheGetDuration'] = (
                    self._op['timeCacheGets'] / self._op['numCacheGets'])
        return self._op

    def setVerbose(self):
        """Sets Verbose to True"""
        self._vb = True

    def unsetVerbose(self):
        """Sets Verbose to False"""
        self._vb = False

    def cleanUp(self, cleanUpCache=True, doExit=False,
                exitMsg="Finished cleanUp, exiting"):
        """ Garbage collect queues, threads, and cache.

            By default, this wipes the cache. The idea being that if the
            entire attack finished successfully, then it won't be
            repeated and the cache isn't needed. Do `cleanUpCache=False`
            if that isn't what you want."""
        if self._vb: print(f"Calling {__name__}.cleanUp")
        if self._rawQ.empty() != True:
            logging.warning("Warning, trying to clean up when raw queue not empty!")
        if self._anonQ.empty() != True:
            logging.warning("Warning, trying to clean up when anon queue not empty!")
        if self.cacheQueue.empty() != True:
            logging.warning("Warning, trying to clean up when cache queue not empty!")
        # Stuff in end signals for the workers (this is a bit bogus, cause
        # if a thread is gone or hanging, not all signals will get read)
        for i in range(self._p['numRawDbThreads']):
            self._rawQ.put(None)
        for i in range(self._p['numAnonDbThreads']):
            self._anonQ.put(None)

        for i in range(self.cacheQueue.qsize()):
            self.cacheQueue.put(None)

        cleanBgThreads()

        if len(self._p['pubDb']) > 0:
            if self._pubQ.empty() != True:
                print("Warning, trying to clean up when pub queue not empty!")
            for i in range(self._p['numPubDbThreads']):
                self._pubQ.put(None)
            for t in self._pubThreads:
                if t.isAlive(): t.stop() # t.join()
        if cleanUpCache:
            self._removeLocalCacheDB()
        if self._session: # close the uber session
            self._session.close()
        if doExit:
            sys.exit(exitMsg)

    def isClaimed(self, spec):
        """Check if a claim was already fully or partially made.

        The `spec` is formatted identical to the `spec` in `gdaAttack.askClaim`."""
        return self._dupCheck.is_claimed(spec, verbose=self._vb)

    def askClaim(self, spec, cache=True, claim=True):
        """Generate Claim query for raw and optionally pub databases.

        Before anything happens, the system uses the `gdaAttack.isClaimed`
        method to determine whether a previous claim fully or partially
        matches the new claim. Such duplicates are not allowed and an error
        will be raised providing additional details about the duplicate.

        Making a claim results in a query to the raw database, and if
        linkability attack, the pub database, to check
        the correctness of the claim. Multiple calls to this method will
        cause the corresponding queries to be queued up, so `askClaim()`
        returns immediately. `getClaim()` harvests one claim result. <br/>
        Set `claim=False` if this claim should not be applied to the
        confidence improvement score. In this case, the probability score
        will instead be reduced accordingly. <br/>
        The `spec` is formatted as follows: <br/>

            {'known':[{'col':'colName','val':'value'},...],
              'guess':[{'col':'colName','val':'value'},...],
            }

        `spec['known']` are the columns and values the attacker already knows
        (i.e. with prior knowledge). Optional. <br/>
        `spec['guess']` are the columns and values the attacker doesn't know,
        but rather is trying to predict. Mandatory for 'singling out'
        and 'inference'. Optional for 'linkabiblity' <br/>
        Answers are cached <br/>
        Returns immediately"""
        if self._vb: print(f"Calling {__name__}.askClaim with spec '{spec}', count {self._claimCounter}")
        if not self._dupCheck.is_claimed(spec, verbose=self._vb, raise_true=True):
            self._dupCheck.claim(spec, verbose=self._vb)
        self._claimCounter += 1
        sql = self._makeSqlFromSpec(spec)
        if self._vb: print(f"Sql is '{sql}'")
        sqlConfs = self._makeSqlConfFromSpec(spec)
        if self._vb: print(f"SqlConf is '{sqlConfs}'")
        # Make a copy of the query for passing around
        job = {}
        job['q'] = self._claimQ
        job['claim'] = claim
        job['queries'] = [{'sql': sql, 'cache': cache}]
        job['spec'] = spec
        for sqlConf in sqlConfs:
            job['queries'].append({'sql': sqlConf, 'cache': cache})
        self._rawQ.put(job)

    def getClaim(self):
        """ Wait for and gather results of askClaim() calls

            Returns a data structure that contains both the result
            of one finished claim, and the claim's input parameters.
            Note that the order in which results are returned by
            `getClaim()` are not necessarily the same order they were
            inserted by `askClaim()`. <br/>
            Assuming `result` is returned: <br/>
            `result['claim']` is the value supplied in the corresponding
            `askClaim()` call <br/>
            `result['spec']` is a copy of the `spec` supplied in the
            corresponding `askClaim()` call. <br/>
            `result['queries']` is a list of the queries generated in order to
            validate the claim. <br/>
            `result['answers']` are the answers to the queries in
            `result['queries']`. <br/>
            `result['claimResult']` is 'Correct' or 'Incorrect', depending
            on whether the claim satisfies the critieria or not. <br/>
            `result['stillToCome']` is a counter showing how many more
            claims are still queued. When `stillToCome` is 0, then all
            claims submitted by `askClaim()` have been returned."""

        if self._vb:
            print(f"Calling {__name__}.getClaim")
        if self._claimCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0, 'claimResult': 'Error'}
        job = self._claimQ.get()
        claim = job['claim']
        self._claimQ.task_done()
        self._claimCounter -= 1
        job['stillToCome'] = self._claimCounter
        self._addToAtkRes('claimTrials', job['spec'], 1)
        # The claim is tested against the first reply
        reply = job['replies'][0]
        job['claimResult'] = 'Wrong'
        if claim:
            self._addToAtkRes('claimMade', job['spec'], 1)
        if 'error' in reply:
            self._addToAtkRes('claimError', job['spec'], 1)
            job['claimResult'] = 'Error'
        else:
            if self._cr == 'singlingOut':
                claimIsCorrect = self._checkSinglingOut(reply['answer'])
            elif self._cr == 'inference':
                claimIsCorrect = self._checkInference(reply['answer'])
            elif self._cr == 'linkability':
                claimIsCorrect = self._checkLinkability(reply['answer'])
            if claim == 1 and claimIsCorrect:
                self._addToAtkRes('claimCorrect', job['spec'], 1)
                job['claimResult'] = 'Correct'
            elif claim == 0 and claimIsCorrect:
                self._addToAtkRes('claimPassCorrect', job['spec'], 1)
                job['claimResult'] = 'Correct'
        if self._cr == 'singlingOut' or self._cr == 'inference':
            # Then measure confidence against the second and third replies
            if 'answer' in job['replies'][1]:
                if job['replies'][1]['answer']:
                    guessedRows = job['replies'][1]['answer'][0][0]
                else:
                    guessedRows = 0
            elif 'error' in job['replies'][1]:
                self._pp.pprint(job)
                print(f"Error: conf query:\n{job['replies'][1]['error']}")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if 'answer' in job['replies'][2]:
                if job['replies'][2]['answer']:
                    totalRows = job['replies'][2]['answer'][0][0]
                else:
                    totalRows = 0
            elif 'error' in job['replies'][2]:
                self._pp.pprint(job)
                print(f"Error: conf query:\n{job['replies'][2]['error']}")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if totalRows:
                self._addToAtkRes('sumConfidenceRatios', job['spec'],
                                  guessedRows / totalRows)
                self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
                self._atrs['tableStats']['totalRows'] = totalRows
        else:
            # For linkability, the confidence is always 1/2
            self._addToAtkRes('sumConfidenceRatios', job['spec'], 0.5)
            self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
        if 'q' in job:
            del job['q']
        return (job)

    def askAttack(self, query, cache=True):
        """ Generate and queue up an attack query for database.

            `query` is a dictionary with (currently) one value: <br/>
            `query['sql']` contains the SQL query. <br/>
            `query['epsilon']` is optional, and defines how much of the differential privacy budget is used for uber_dp <br/>
        """
        self._attackCounter += 1
        if self._vb: print(f"Calling {__name__}.askAttack with query '{query}', count {self._attackCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._attackQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        self._anonQ.put(job)

    def getAttack(self):
        """ Returns the result of one askAttack() call

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askAttack()` calls were made. <br/>
            Assuming `result` is returned: <br/>
            `result['answer']` is the answer returned by the DB. The
            format is: <br/>
                `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
            where C1 is the first element of the `SELECT`, C2 the second
            element, etc. This attribute does not exist in cases of query
            error (i.e. bad sql, budget exceeded if uber_dp, etc.) <br/>
            `result['cells']` is the number of cells returned in the answer
            (used by `gdaAttack()` to compute total attack cells) <br/>
            `result['query']['sql']` is the query from the corresponding
            `askAttack()`.
            `result['error']` contains the error description <br/>
            `result['remaining_dp_budget']` contains the remaining differential
            privacy budget when uber_dp is used. <br/>
            """

        if self._vb:
            print(f"Calling {__name__}.getAttack")
        if self._attackCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._attackQ.get()
        self._attackQ.task_done()
        self._attackCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._attackCounter
        self._atrs['base']['attackGets'] += 1
        if 'cells' in reply:
            if reply['cells'] == 0:
                self._atrs['base']['attackCells'] += 1
            else:
                self._atrs['base']['attackCells'] += reply['cells']
        else:
            self._atrs['base']['attackCells'] += 1

        if self._type == 'uber_dp':
            reply['remaining_dp_budget'] = self._remaining_dp_budget
        return (reply)

    def askKnowledge(self, query, cache=True):
        """ Generate and queue up a prior knowledge query for database

            The class keeps track of how many prior knowledge cells were
            returned and uses this to compute a score. <br/>
            Input parameters formatted the same as with `askAttack()`"""

        self._knowledgeCounter += 1
        if self._vb: print(f"Calling {__name__}.askKnowledge with query "
                           f"'{query}', count {self._knowledgeCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._knowledgeQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        self._rawQ.put(job)

    def getKnowledge(self):
        """ Wait for and gather results of prior askKnowledge() calls

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askKnowledge()` calls were made. <br/>
            Return parameter formatted the same as with `getAttack()`"""

        if self._vb:
            print(f"Calling {__name__}.getKnowledge")
        if self._knowledgeCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._knowledgeQ.get()
        self._knowledgeQ.task_done()
        self._knowledgeCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._knowledgeCounter
        self._atrs['base']['knowledgeGets'] += 1
        if 'cells' in reply:
            self._atrs['base']['knowledgeCells'] += reply['cells']
        return (reply)

    def askExplore(self, query, cache=True):
        """ Generate and queue up an exploritory query for database

            No score book-keeping is done here. An analyst may make
            any number of queries without impacting the GDA score. <br/>
            `query` is a dictionary with two values: <br/>
            `query['sql']` contains the SQL query. <br/>
            `query['db']` determines which database is queried, and
            is one of 'rawDb', 'anonDb', or (if linkability), 'pubDb'."""

        self._exploreCounter += 1
        if self._vb: print(f"Calling {__name__}.askExplore with "
                           f"query '{query}', count {self._exploreCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._exploreQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        if qCopy['db'] == 'rawDb' or qCopy['db'] == 'raw':
            self._rawQ.put(job)
        elif qCopy['db'] == 'anonDb' or qCopy['db'] == 'anon':
            self._anonQ.put(job)
        else:
            self._pubQ.put(job)

    def getExplore(self):
        """ Wait for and gather results of prior askExplore() calls.

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askExplore()` calls were made. <br/>
            Return parameter formatted the same as with `getAttack()`"""
        if self._vb:
            print(f"Calling {__name__}.getExplore")
        if self._exploreCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._exploreQ.get()
        self._exploreQ.task_done()
        self._exploreCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._exploreCounter
        return (reply)

    def getPublicColValues(self, colName, tableName=''):
        """Return list of "publicly known" column values and counts

        Column value has index 0, count of distinct UIDs has index 1
        Must specify column name.
        """
        if len(colName) == 0:
            print(f"Must specify column 'colName'")
            return None

        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # First we need to know the total number of distinct users
        sql = str(f"""select count(distinct {self._p['uid']})
                      from {tableName}""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        numUid = ans[0][0]
        # Query the raw db for values in the column
        sql = str(f"""select {colName}, count(distinct {self._p['uid']})
                      from {tableName}
                      group by 1
                      order by 2 desc
                      limit 200""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            # row[0] is the value, row[1] is the count
            if (((row[1] / numUid) > 0.002) and
                    (row[1] >= 50)):
                ret.append((row[0], row[1]))
        conn.close()
        return ret

    def getColNames(self, dbType='rawDb', tableName=''):
        """Return simple list of column names

        `dbType` is one of 'rawDb' or 'anonDb'"""

        if len(tableName) == 0:
            colsAndTypes = self.getColNamesAndTypes(dbType=dbType)
        else:
            colsAndTypes = self.getColNamesAndTypes(
                dbType=dbType, tableName=tableName)
        if not colsAndTypes:
            return None
        cols = []
        for tup in colsAndTypes:
            cols.append(tup[0])
        return cols

    def getAttackTableName(self):
        """Returns the name of the table being used in the attack."""
        return self._p['table']

    def getTableCharacteristics(self, tableName=''):
        """Returns the full contents of the table characteristics

           Return value is a dict indexed by column name: <br/>

               { '<colName>':
                   {
                       'av_rows_per_vals': 3.93149,
                       'av_uids_per_val': 0.468698,
                       'column_label': 'continuous',
                       'column_name': 'dropoff_latitude',
                       'column_type': 'real',
                       'max': '898.29382000000000',
                       'min': '-0.56333297000000',
                       'num_distinct_vals': 24216,
                       'num_rows': 95205,
                       'num_uids': 11350,
                       'std_rows_per_val': 10.8547,
                       'std_uids_per_val': 4.09688},
                   }
               }

        """
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Modify table name to the default for the characteristics table
        tableName += '_char'

        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Set up return dict
        ret = {}
        # Query it for column names
        sql = str(f"""select column_name, data_type 
                  from information_schema.columns where
                  table_name='{tableName}'""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        cols = cur.fetchall()
        # Make index for column name (should be 0, but just to be sure)
        for colNameIndex in range(len(cols)):
            if cols[colNameIndex][0] == 'column_name':
                break

        # Query it for table contents
        sql = str(f"SELECT * FROM {tableName}")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        for row in ans:
            colName = row[colNameIndex]
            ret[colName] = {}
            for i in range(len(row)):
                ret[colName][cols[i][0]] = row[i]
        conn.close()
        return ret

    def getAnonTableCharacteristics(self, tableName=''):
        """Returns the full contents of the table characteristics

           Return value is a dict indexed by column name: <br/>

               { '<colName>':
                   {
                       'av_rows_per_vals': 3.93149,
                       'av_uids_per_val': 0.468698,
                       'column_label': 'continuous',
                       'column_name': 'dropoff_latitude',
                       'column_type': 'real',
                       'max': '898.29382000000000',
                       'min': '-0.56333297000000',
                       'num_distinct_vals': 24216,
                       'num_rows': 95205,
                       'num_uids': 11350,
                       'std_rows_per_val': 10.8547,
                       'std_uids_per_val': 4.09688},
                   }
               }

        """
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Modify table name to the default for the characteristics table
        # tableName += '_char'

        # Establish connection to database
        db = getDatabaseInfo(self._p['anonDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()

        # Query it for column names
        sql = str(f"""select column_name, data_type 
                  from information_schema.columns
                  where table_schema NOT IN ('information_schema', 'pg_catalog') and 
                  table_name='{tableName}'""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getAnonTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()

        # Set up return dict
        ret = {_row[0]: {'column_name': _row[0], 'column_type': _row[1]} for _row in ans}

        conn.close()
        return ret

    # Note that following is used internally, but we expose it to the
    # caller as well because it is a useful function for exploration
    def getColNamesAndTypes(self, dbType='rawDb', tableName=''):
        """Return raw database column names and types (or None if error)

        dbType is one of 'rawDb' or 'anonDb' <br/>
        return format: [(col,type),(col,type),...]"""
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Establish connection to database
        db = getDatabaseInfo(self._p[dbType])
        if db['type'] != 'postgres' and db['type'] != 'aircloak':
            print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
            return None
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Query it for column names
        if db['type'] == 'postgres':
            sql = str(f"""select column_name, data_type 
                      from information_schema.columns where
                      table_name='{tableName}'""")
        elif db['type'] == 'aircloak':
            sql = str(f"show columns from {tableName}")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getColNamesAndTypes() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            ret.append((row[0], row[1]))
        conn.close()
        return ret

    def getTableNames(self, dbType='rawDb'):
        """Return database table names

        dbType is one of 'rawDb' or 'anonDb' <br/>
        Table names returned as list, unless error then return None"""

        # Establish connection to database
        db = getDatabaseInfo(self._p[dbType])
        if db['type'] != 'postgres' and db['type'] != 'aircloak':
            print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
            return None
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Query it for column names
        if db['type'] == 'postgres':
            sql = """SELECT tablename
                     FROM pg_catalog.pg_tables
                     WHERE schemaname != 'pg_catalog' AND
                           schemaname != 'information_schema'"""
        elif db['type'] == 'aircloak':
            sql = "show tables"
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableNames() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            ret.append(row[0])
        conn.close()
        return ret

    def getUidColName(self):
        """ Returns the name of the UID column"""
        return self._p['uid']

    def getPriorKnowledge(self, dataColumns, method,
            fraction=None, count=None, selectColumn=None, colRange=[None,None], values=[None]):
        """ Returns data from the rawDB according to a specification

        This mimics external knowledge that an attacker may have about the data, and
        influences the 'knowledge' part of the GDA Score. <br/>
            `dataColumns` is a list of column names. The data for these columns is returned <br/>
            `method` can be 'rows' or 'users'. If 'rows', then rows are selected
            according to the criteria (`fraction`, `count`, `selectColumn`, `colRange`,
            or `values`).
            If 'users', then all rows for a set of selected users is returned.
            The users are selected according to the criteria (`fraction` or `count`) <br/>
            If none of the criteria are set, or if `fraction` is set to 1.0, then all
            rows are returned (for the selected column values) One of `fraction`, `count`,
            or `selectColumn` must be set. <br/>
            `fraction` or `count` are set to obtain a random set of rows or users. If
            `fraction`, then an approximate fraction of all rows/users is selected.
            `fraction` is a value between 0 and 1.0. If `count`, then exactly `count`
            random rows/users are selected. <br/>
            `selectColumn` is set to select rows according to the values of the specified
            column. `selectColumn` is a column name. If set, then either a range of
            values (`colRange`), or a set of values (`values`) must be chosen. <br/>
            `colRange` is
            a list with two values: `[min,max]`. This selects all values
            between min and max inclusive. <br/>
            `values` is a list
            of one or more values of any type. This selects all values matching those in
            the list. <br/>
            The return value is a list in this format: <br/>
                `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
            where C1 corresponds to the first column in `dataColumns`, C2 corresponds to
            the second column in `dataColumns`, and so on.  <br/>
        """
        # Check input parameters
        if not isinstance(dataColumns, list):
            print(f"getPriorKnowledge Error: dataColumns must be a list of one or more column names")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if method not in ['rows','users']:
            print(f"getPriorKnowledge Error: method must be 'rows' or 'users'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if fraction is None and count is None and selectColumn is None:
            print(f"getPriorKnowledge Error: one of fraction, count, or selectColumn must be set")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if fraction and not isinstance(fraction, float):
            print(f"getPriorKnowledge Error: if set, fraction must be a float")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if (fraction and (count or selectColumn)) or (count and (fraction or selectColumn)):
            print(f"getPriorKnowledge Error: only one of fraction, count, or selectColumn may be set")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if count and not isinstance(count, int):
            print(f"getPriorKnowledge Error: if set, count must be an integer")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if selectColumn:
            if selectColumn not in self._colNames:
                print(f"getPriorKnowledge Error: selectColumn '{selectColumn}' is not a valid column")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if colRange == [None,None] and values == [None]:
                print(f"getPriorKnowledge Error: if selectColumn is set, one of colRange or values must be set")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if not isinstance(colRange, list):
                print(f"getPriorKnowledge Error: colRange must be a list with two values")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if not (isinstance(values, list) or isinstance(values, tuple)) or len(values) == 0:
                print(f"getPriorKnowledge Error: values must be a list or tuple with one or more values")
                self.cleanUp(cleanUpCache=False, doExit=True)
        for col in dataColumns:
            if col not in self._colNames:
                print(f"getPriorKnowledge Error: column '{col}' is not a valid column")
                self.cleanUp(cleanUpCache=False, doExit=True)
        # Basic input checks finished
        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        table = self._p['table']
        uid = self._p['uid']
        # Make the SELECT part of the SQL query
        initSql = 'SELECT '
        for col in dataColumns:
            initSql += str(f"{col}, ")
        initSql = initSql[0:-2]
        if method == 'rows' and fraction:
            sql = initSql + str(f" FROM {table} WHERE random() <= {fraction}")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'users' and fraction:
            sql = initSql + str(f" FROM {table} WHERE {uid} IN ")
            sql += str(f"(SELECT {uid} from (SELECT DISTINCT {uid} FROM {table}) t WHERE random() < {fraction})")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and colRange[0] is not None:
            for pair in self._colNamesTypes:
                if selectColumn in pair[0]:
                    colType = pair[1]
                    break
            if 'text' in colType or 'char' in colType or 'date' in colType or 'time' in colType:
                sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= '{colRange[0]}' and {selectColumn} <= '{colRange[1]}'")
            else:
                sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= {colRange[0]} and {selectColumn} <= {colRange[1]}")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and values[0] is not None:
            sql = initSql + str(f" FROM {table} WHERE {selectColumn} IN (")
            for pair in self._colNamesTypes:
                if selectColumn in pair[0]:
                    colType = pair[1]
                    break
            for value in values:
                if "text" in colType or "date" in colType or "time" in colType:
                    sql += str(f"'{value}', ")
                else:
                    sql += str(f"{value}, ")
            sql = sql[0:-2]
            sql += ")"
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and count:
            # need to know the total number of rows
            sql = str(f"select count(*) from {table}")
            ans = self._doQuery(cur,sql)
            numRows = ans[0][0]
            # next we get some random set of rows that is certainly more than we need
            frac = (count/numRows)*2
            sql = initSql + str(f" FROM {table} WHERE random() <= {frac}")
            temp = self._doQuery(cur,sql)
            # next we scramble these so that we get a random sampling from the random sampling
            random.shuffle(temp)
            # finally pick the exact count
            ans = temp[0:count]
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'users' and count:
            # get the full list of distinct UIDs
            sql = str(f"SELECT DISTINCT {uid} from {table}")
            uidList = self._doQuery(cur,sql)
            # next we scramble these so that we can get a random sampling
            random.shuffle(uidList)
            # pick the exact count of UIDs
            uidList = uidList[0:count]
            sql = initSql + str(f" FROM {table} WHERE {uid} IN (")
            for pair in self._colNamesTypes:
                if uid in pair[0]:
                    colType = pair[1]
                    break
            for uidVal in uidList:
                if "text" in colType or "date" in colType or "time" in colType:
                    sql += str(f"'{uidVal[0]}', ")
                else:
                    sql += str(f"{uidVal[0]}, ")
            sql = sql[0:-2]
            sql += ")"
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        #zzzz
        return None
    #def getPriorKnowledge(self, dataColumns, method,
            #fraction=None, count=None, selectColumn=None, colRange=[None,None], values=[None]):

    # -------------- Private Methods -------------------
    def _doQuery(self,cur,sql):
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        return ans

    def _cleanPasswords(self):
        if 'attack' in self._atrs:
            if ('anonDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['anonDb']):
                self._atrs['attack']['anonDb']['password'] = 'xxxxxxx'
            if ('rawDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['rawDb']):
                self._atrs['attack']['rawDb']['password'] = 'xxxxxxx'
            if ('pubDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['pubDb']):
                self._atrs['attack']['pubDb']['password'] = 'xxxxxxx'

        return

    def _assignGlobalParams(self, params):
        self._pp = pprint.PrettyPrinter(indent=4)
        for key, val in params.items():
            self._p[key] = val
            # assign verbose value to a smaller variable name
            if key == "verbose":
                if val != False:
                    self._vb = True
            # Check criteria
            if key == "criteria":
                if (val == 'singlingOut' or val == 'inference' or
                        val == 'linkability'):
                    self._cr = val
                else:
                    print("""Error: criteria must be one of 'singlingOut',
                             'inference', or 'linkability'""")
                    sys.exit('')

    def _setupLocalCacheDB(self):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        conn = sqlite3.connect(path)
        cur = conn.cursor()
        if self._p['flushCache'] == True:
            sql = "DROP TABLE IF EXISTS tab"
            if self._vb: print(f"   cache DB: {sql}")
            cur.execute(sql)
        sql = """CREATE TABLE IF NOT EXISTS tab
                 (qid text primary key, answer text)"""
        if self._vb: print(f"   cache DB: {sql}")
        cur.execute(sql)
        # conn.commit()
        cur.execute("PRAGMA journal_mode=WAL;")
        # conn.commit()
        conn.close()

    def _removeLocalCacheDB(self):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        max_attempts = 5
        attempt = 0
        removeFlag = False
        _ex = None
        if os.path.exists(path):
            while attempt <= max_attempts:
                attempt += 1
                try:
                    os.remove(path)
                    removeFlag = True
                    break
                except Exception as ex:
                    _ex = ex
                    removeFlag = False
                time.sleep(0.3)

            if not removeFlag:
                logging.error(f"cache db removing error after {attempt} attempts.\n"
                              f"ERROR: Failed to remove cache DB {path} => ex: {_ex}")
            else:
                logging.info(f"cache db removed successfully after {attempt} attempt(s).")

    def removeLocalCacheDBWrapper(self):
        return self._removeLocalCacheDB()

    def _setupThreadsAndQueues(self):
        self._anonThreads = []
        self._rawThreads = []
        self._pubThreads = []
        self._exploreQ = queue.Queue()
        self._knowledgeQ = queue.Queue()
        self._attackQ = queue.Queue()
        self._claimQ = queue.Queue()
        self._guessQ = queue.Queue()
        self._rawQ = queue.Queue()
        if self._cr == 'linkability':
            self._pubQ = queue.Queue()
        self._anonQ = queue.Queue()
        backQ = queue.Queue()
        for i in range(self._p['numRawDbThreads']):
            d = dict(db=self._p['rawDb'], q=self._rawQ,
                     kind='raw', backQ=backQ)
            t = EnhancedThread(target=self._dbWorker, kwargs=d)
            t.start()
            self._rawThreads.append(t)
        for i in range(self._p['numAnonDbThreads']):
            d = dict(db=self._p['anonDb'], q=self._anonQ,
                     kind='anon', backQ=backQ)
            t = EnhancedThread(target=self._dbWorker, kwargs=d)
            t.start()
            self._anonThreads.append(t)
        if self._cr == 'linkability':
            for i in range(self._p['numPubDbThreads']):
                d = dict(db=self._p['pubDb'], q=self._pubQ,
                         kind='pub', backQ=backQ)
                t = EnhancedThread(target=self._dbWorker, kwargs=d)
                t.start()
                self._pubThreads.append(t)
        num = (self._p['numRawDbThreads'] + self._p['numAnonDbThreads'])
        if self._cr == 'linkability':
            num += self._p['numPubDbThreads']
        # Make sure all the worker threads are ready
        for i in range(num):
            msg = backQ.get()
            if self._vb: print(f"{msg} is ready")
            backQ.task_done()

    def _dbWorker(self, db, q, kind, backQ):

        # uber dp has a different interface than aircloak or postgres
        if db['type'] == 'uber_dp':
            if self._vb: print(f"Starting {__name__}.serverWorker:{db, kind}")
            me = threading.current_thread()
            backQ.put(me)
            while True:
                jobOrig = q.get()
                q.task_done()
                if jobOrig is None:
                    if self._vb:
                        print(f"    {me}: serverWorker done {db, kind}")
                    break
                # make a copy for passing around
                job = copy.copy(jobOrig)
                replyQ = job['q']
                replies = []  # holds all the reply dicts
                for query in job['queries']:
                    reply = self._processUberQuery(query)
                    replies.append(reply)
                job['replies'] = replies
                replyQ.put(job)

        elif db['type'] == 'aircloak' or db['type'] == 'postgres':
            if self._vb: print(f"Starting {__name__}.dbWorker:{db, kind}")
            me = threading.current_thread()
            d = getDatabaseInfo(db)
            # Establish connection to database
            connStr = str(
                f"host={d['host']} port={d['port']} dbname={d['dbname']} user={d['user']} password={d['password']}")
            if self._vb: print(f"    {me}: Connect to DB with DSN '{connStr}'")
            conn = psycopg2.connect(connStr)
            cur = conn.cursor()
            # Establish connection to local cache
            path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
            # Set timeout low so that we don't spend a lot of time inserting
            # into the cache in case it gets overloaded
            connInsert = sqlite3.connect(path, timeout=0.1)
            curInsert = connInsert.cursor()
            connRead = sqlite3.connect(path)
            curRead = connRead.cursor()
            backQ.put(me)
            while True:
                if isinstance(me, EnhancedThread) and me.stopped():
                    logging.info(f' > {me.getName()} stopped.')
                    return
                try:
                    jobOrig = q.get(block=True, timeout=3)
                except queue.Empty:
                    continue
                q.task_done()
                if jobOrig is None:
                    if self._vb: print(f"    {me}: dbWorker done {db, kind}")
                    conn.close()
                    connRead.close()
                    connInsert.close()
                    break
                # make a copy for passing around
                job = copy.copy(jobOrig)
                replyQ = job['q']
                replies = []
                for query in job['queries']:
                    reply = self._processQuery(query, conn, cur,
                                               connInsert, curInsert, curRead)
                    replies.append(reply)
                job['replies'] = replies
                replyQ.put(job)

    def _processUberQuery(self, query):
        # Once the session ID is defined, we stay in that session
        # ONLY `epsilon` and the `query` can be set
        # `budget` and `dbname` just have placeholders because they cannot be changed anyways
        request = {
            'query': query['sql'],
            'epsilon': str(query['epsilon']),
            'count' : '1', # the interface is designed in a way such that repeted attacks need to be triggered
            # by several askAttack(), getAttack(). Therefore, the server functionality to potentially execute the same
            # query several times is not used
            'budget': 'None',
            'dbname': 'None',
            'sid': self._sid
        }

        start = time.perf_counter()  # store the time of query execution

        url = self._p['anonDb']['host']
        headers = {'Content-Type': 'application/json',
                       'Accept': 'application/json'}  # Headers to be sent in the client request
        # Client stores the response sent by the simpleServer.py
        try:
            response = requests.get(url, json=request, headers=headers, timeout=100, verify=True)

            resp = response.json()  # Convert response sent by server to JSON
            if self._vb:
                print("Server response for the given query: ")
                print(resp)
            if 'Error' in resp['Server Response']:
                # If budget exceeded, we do not provide an answer field in the reply
                if 'Budget Exceeded' in resp['Server Response']['Error']:
                    print("This query does exceed the remaining privacy budget for your attack.")
                    print("Your remaining budget is "+str(self._remaining_dp_budget)+", the query would need "+str(query['epsilon'])+".")
                    reply = dict(error='Budget Exceeded')
            else:
                # if the query went through, we can deduct its privay consumption to keep track internally
                self._remaining_dp_budget -= query['epsilon']

                # the answer of dp queries is a single value (as it computes the aggregate over several query rows)
                # to match the format needed to compute number of cells, we still need two dimensions
                # therefore, [[]]
                ans = [[float((resp['Server Response']['Result']))]]# record the answer and append it as a 1-element list of float

                # for statistics. Only makes sense to count query if it went through
                self._op['numQueries'] += 1

                # after all for loops find the shape of the resulting answers
                numCells = self._computeNumCells(ans)

                # format the reply similarly as for aircloak and postgres
                reply = dict(answer=ans, cells=numCells)

        except requests.ConnectionError as e:
            print("Connection Error. Make sure you are connected to Internet.")
            print(str(e))

        except requests.Timeout as e:
            print("Timeout Error")
            print(str(e))

        except requests.RequestException as e:
            print("General Error")
            print(str(e))

        except KeyboardInterrupt:
            print("Program closed")


        reply['query'] = query

        # calculate the time we needed for the query
        end = time.perf_counter()
        duration = end - start

        self._op['timeQueries'] += duration

        return reply

    def _processQuery(self, query, conn, cur, connInsert, curInsert, curRead, queryType='db'):
        # record and remove the return queue
        # queryType specifies if we are asking the queries from a db (aircloak, postgres)
        # or from a server, like uber_dp
        if queryType == 'server':
            pass
        elif queryType == 'db':
            cache = query['cache']
            del query['cache']
            # Check the cache for the answer
            # Note that at this point query is a dict
            # containing the sql, the db (raw, anon, or pub),
            # and any tags that the source added
            cachedReply = None
            if cache:
                cachedReply = self._getCache(curRead, query)
            if cachedReply:
                if self._vb: print("    Answer from cache")
                if 'answer' in cachedReply:
                    numCells = self._computeNumCells(cachedReply['answer'])
                    cachedReply['cells'] = numCells
                return cachedReply
            else:
                start = time.perf_counter()
                try:
                    cur.execute(query['sql'])
                except psycopg2.Error as e:
                    reply = dict(error=e.pgerror)
                else:
                    ans = cur.fetchall()
                    numCells = self._computeNumCells(ans)
                    reply = dict(answer=ans, cells=numCells)
                end = time.perf_counter()
                duration = end - start
                self._op['numQueries'] += 1
                self._op['timeQueries'] += duration
                reply['query'] = query
                # only cache if the native query is slow
                if duration > 0.1:
                    # self._putCache(connInsert, curInsert, query, reply)
                    self.cacheQueue.put([connInsert, curInsert, query, reply])
            return reply

    def _checkInference(self, ans):
        # column 0 must be UID
        # User is inferred if all users in answer have same attributes
        # Returns 1 if inference claim correct, else returns 0
        if len(ans) == 0:
            return 0
        numRows = len(ans)
        numColumns = len(ans[0])
        if numColumns < 2:
            # Can't test inference unless there is at least one column
            # (other than UID) that is the same
            return 0
        for c in range(1, numColumns):
            val = ans[0][c]
            for r in range(1, numRows):
                if val != ans[r][c]:
                    return 0
        return 1

    def _checkLinkability(self, ans):
        # The test is the same as with singling out
        # Returns 1 if linkability claim correct, else returns 0
        return self._checkSinglingOut(ans)

    def _checkSinglingOut(self, ans):
        # column 0 must be UID
        # User is singled-out if there is only one distinct UID
        # Returns 1 if singling out claim correct, else returns 0
        if len(ans) == 0:
            return 0
        uids = {}
        for row in ans:
            uids[row[0]] = 1
        numUids = len(uids)
        if numUids == 1:
            return 1
        else:
            return 0

    def _computeNumCells(self, ans):
        # ans is a list of tuples [(x,y),(x,y),(x,y) ...
        # Count the number of columns (in the first row)
        if len(ans) == 0:
            return 0
        numColumns = len(ans[0])
        numRows = len(ans)
        numCells = numColumns * numRows
        return numCells

    def _doParamChecks(self):
        dbInfoRaw = getDatabaseInfo(self._p['rawDb'])
        if not dbInfoRaw:
            sys.exit('rawDb now found in database config')
        if len(self._p['anonDb']) == 0:
            self._p['anonDb'] = self._p['rawDb']
        else:
            dbInfoAnon = getDatabaseInfo(self._p['anonDb'])
            if not dbInfoAnon:
                sys.exit('anonDb not found in database config')
        if self._cr == 'linkability':
            dbInfo = getDatabaseInfo(self._p['pubDb'])
            if not dbInfo:
                sys.exit('Must specify pubDb if criteria is linkability')
        numThreads = self._p['numRawDbThreads'] + self._p['numAnonDbThreads']
        if self._cr == 'linkability':
            numThreads += self._p['numPubDbThreads']
        if numThreads > 50:
            sys.exit("Error: Can't have more than 50 threads total")

    def _getCache(self, cur, query):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        my_conn = sqlite3.connect(path, timeout=0.1)
        my_cur = my_conn.cursor()
        # turn the query (dict) into a string
        qStr = self._dict2Str(query)
        if qStr is None:
            return None
        sql = str(f"SELECT answer FROM tab where qid = '{qStr}'")
        if self._vb: print(f"   cache DB: {sql}")
        start = time.perf_counter()
        for z in range(1,11):
            try:
                # cur.execute(sql)
                my_cur.execute(sql)
            except sqlite3.OperationalError as e:
                # database is locked
                if self._p['verbose'] or self._vb:
                    logging.warning(f'>> reading from cache DB: {z} attempt(s). Coming next try '
                                  f'soon...')
                err = e
                time.sleep(0.5)
                continue
            except (sqlite3.Error, Exception) as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"getCache error '{e.args[0]}' attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            else:
                break
        else:
            if self._p['verbose'] or self._vb:
                logging.error(f'>> could not read from cache DB >> ERROR: {err}')
            return None
        end = time.perf_counter()
        self._op['numCacheGets'] += 1
        self._op['timeCacheGets'] += (end - start)
        # answer = cur.fetchone() # frzmohammadali just to remember my stupidest bug ever
        answer = my_cur.fetchone()
        my_cur.close()
        my_conn.close()
        if not answer:
            return None
        rtnDict = self._str2Dict(answer[0])
        return rtnDict

    def _putCache(self, conn, cur, query, reply):
        # turn the query and reply (dict) into a string
        # Establish connection to local cache
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        qStr = self._dict2Str(query)
        if qStr is None:
            return
        rStr = self._dict2Str(reply)
        if rStr is None:
            return
        sql = str(f"INSERT INTO tab VALUES ('{qStr}','{rStr}')")
        if self._vb: print(f"   cache DB: {sql}")
        start = time.perf_counter()
        err = None
        for z in range(10):
            try:
                # cur.execute(sql)
                # conn.commit()
                my_conn = sqlite3.connect(path, timeout=0.1)
                my_cur = my_conn.cursor()
                my_cur.execute(sql)
                my_conn.commit()

            except sqlite3.IntegrityError as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache error [qid exists in cached queries] '{e.args[0]}' ")
                break
            except sqlite3.OperationalError as e:
                # database is locked
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            except (sqlite3.Error, Exception) as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache error '{e.args[0]}' attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            else:
                break
            finally:
                try:
                    if my_cur:
                        my_cur.close()
                    if my_conn:
                        my_conn.close()
                except sqlite3.ProgrammingError:
                    # cursor and connection is already closed
                    pass
        else:
            # raise err
            if self._p['verbose'] or self._vb:
                logging.error(f'>> could not insert into cache DB >> ERROR: {err}')

        end = time.perf_counter()
        self._op['numCachePuts'] += 1
        self._op['timeCachePuts'] += (end - start)

    def putCacheWrapper(self, conn, cur, query, reply):
        self._putCache(conn, cur, query, reply)

    def _dict2Str(self, d):
        try:
            dStr = simplejson.dumps(d)
        except TypeError:
            print("simpleJson failed")
            return None
        dByte = str.encode(dStr)
        dByte64 = base64.b64encode(dByte)
        try:
            dByte64Str = str(dByte64, "utf-8")
        except MemoryError:
            print("str(dByte64) failed")
            return None
        return dByte64Str

    def _str2Dict(self, dByte64Str):
        dByte64 = str.encode(dByte64Str)
        dByte = base64.b64decode(dByte64)
        dStr = str(dByte, "utf-8")
        d = simplejson.loads(dStr)
        return d

    def _makeSqlFromSpec(self, spec):
        sql = "select "
        if 'known' in spec:
            numKnown = len(spec['known'])
        else:
            numKnown = 0
        if 'guess' in spec:
            numGuess = len(spec['guess'])
        else:
            numGuess = 0
        if self._cr == 'inference':
            sql += str(f"{self._p['uid']}, ")
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']}")
                if i == (numGuess - 1):
                    sql += " "
                else:
                    sql += ", "
            sql += str(f"from {self._p['table']} ")
            if numKnown:
                sql += "where "
            for i in range(numKnown):
                sql += str(f"{spec['known'][i]['col']} = ")
                sql += str(f"'{spec['known'][i]['val']}' ")
                if i == (numKnown - 1):
                    sql += " "
                else:
                    sql += "and "
        elif self._cr == 'singlingOut' or self._cr == 'linkability':
            sql += str(f"{self._p['uid']} from {self._p['table']} where ")
            for i in range(numKnown):
                sql += str(f"{spec['known'][i]['col']} = ")
                sql += str(f"'{spec['known'][i]['val']}' and ")
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']} = ")
                sql += str(f"'{spec['guess'][i]['val']}' ")
                if i == (numGuess - 1):
                    sql += " "
                else:
                    sql += "and "
        return sql

    def _makeSqlConfFromSpec(self, spec):
        sqls = []
        numGuess = len(spec['guess'])
        if self._cr == 'inference' or self._cr == 'singlingOut':
            sql = str(f"select count(distinct {self._p['uid']}) from {self._p['table']} where ")
            # This first sql learns the number of rows matching the
            # guessed values
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']} = ")
                sql += str(f"'{spec['guess'][i]['val']}'")
                if i != (numGuess - 1):
                    sql += " and "
            sqls.append(sql)
            # This second sql learns the total number of rows (should
            # normally be a cached result)
            sql = str(f"select count(distinct {self._p['uid']}) from {self._p['table']}")
            sqls.append(sql)
        elif self._cr == 'linkability':
            # nothing happens for linkability
            pass
        return sqls

    def _addToAtkRes(self, label, spec, val):
        """Adds the value to each column in the guess"""
        for tup in spec['guess']:
            col = tup['col']
            if col not in self._atrs['col']:
                print(f"Error: addToAtkRes(): Bad column in spec: '{col}'")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if label not in self._atrs['col'][col]:
                print(f"Error: addToAtkRes(): Bad label '{label}'")
                self.cleanUp(cleanUpCache=False, doExit=True)
            self._atrs['col'][col][label] += val

    def _initAtkRes(self):
        self._atrs = {}
        self._atrs['attack'] = {}
        self._atrs['base'] = {}
        self._atrs['tableStats'] = {}
        self._atrs['col'] = {}
        # ----- Attack parameters
        self._atrs['attack']['attackName'] = self._p['name']
        self._atrs['attack']['rawDb'] = self._p['rawDb']
        self._atrs['attack']['anonDb'] = self._p['anonDb']
        if self._cr == 'linkability':
            self._atrs['attack']['pubDb'] = self._p['anonDb']
        self._atrs['attack']['criteria'] = self._p['criteria']
        self._atrs['attack']['table'] = self._p['table']
        # add parameters for the database machine itself
        db = getDatabaseInfo(self._p['rawDb'])
        self._atrs['attack']['rawHost'] = db['host']
        self._atrs['attack']['rawDbName'] = db['dbname']
        self._atrs['attack']['rawPort'] = db['port']
        if self._cr == 'linkability':
            db = getDatabaseInfo(self._p['pubDb'])
            self._atrs['attack']['pubHost'] = db['host']
            self._atrs['attack']['pubDbName'] = db['dbname']
            self._atrs['attack']['pubPort'] = db['port']
        db = getDatabaseInfo(self._p['anonDb'])
        self._atrs['attack']['anonHost'] = db['host']
        self._atrs['attack']['anonDbName'] = db['dbname']
        self._atrs['attack']['anonPort'] = db['port']
        # and a timestamp
        self._atrs['attack']['startTime'] = str(datetime.datetime.now())
        # ----- Params for computing knowledge:
        # number of prior knowledge cells requested
        self._atrs['base']['knowledgeCells'] = 0
        # number of times knowledge was queried
        self._atrs['base']['knowledgeGets'] = 0

        # ----- Params for computing how much work needed to attack:
        # number of attack cells requested
        self._atrs['base']['attackCells'] = 0
        # number of times attack was queried
        self._atrs['base']['attackGets'] = 0
        self._atrs['tableStats']['colNamesAndTypes'] = self._colNamesTypes
        self._atrs['tableStats']['numColumns'] = len(self._colNamesTypes)
        for tup in self._colNamesTypes:
            col = tup[0]
            if self._vb: print(f"initAtkRes() init column '{col}'")
            self._atrs['col'][col] = {}

            # ----- Params for computing claim success rate:
            # total possible number of claims
            self._atrs['col'][col]['claimTrials'] = 0
            # actual number of claims
            self._atrs['col'][col]['claimMade'] = 0
            # number of correct claims
            self._atrs['col'][col]['claimCorrect'] = 0
            # number of claims that produced bad SQL answer
            self._atrs['col'][col]['claimError'] = 0
            # claims where the attacker chose to pass (not make a claim),
            # but where the claim would have been correct
            self._atrs['col'][col]['claimPassCorrect'] = 0

            # ----- Params for computing confidence:
            # sum of all known count to full count ratios
            self._atrs['col'][col]['sumConfidenceRatios'] = 0
            # number of such ratios
            self._atrs['col'][col]['numConfidenceRatios'] = 0
            # average confidence ratio (division of above two params)
            self._atrs['col'][col]['avgConfidenceRatios'] = 0

    def _initOp(self):
        self._op['numQueries'] = 0
        self._op['timeQueries'] = 0
        self._op['numCachePuts'] = 0
        self._op['timeCachePuts'] = 0
        self._op['numCacheGets'] = 0
        self._op['timeCacheGets'] = 0

    def _initCounters(self):
        self._exploreCounter = 0
        self._knowledgeCounter = 0
        self._attackCounter = 0
        self._claimCounter = 0
        self._guessCounter = 0

    def _initUberDPSession(self):
        # Client establishes a session
        session = requests.Session()
        session.get_orig, session.get = session.get, functools.partial(session.get, timeout=20)

        # remember the session to close it if necessary
        self._session = session

        # function to initialize the session with the dp server
        try:
            # this is the initial query.
            # its only purpose is to obtain a session ID and to define a budget
            # The budget is set in the initial request only
            # Once the budget is set, no further modification to the budget
            # is possible in subsequent requests
            request = {
                'query': "",  # empty query, just serves to get a session ID
                'epsilon': '0.0',  # nothing used up in the initialization phase
                'budget': str(self._remaining_dp_budget),  # the numeric values are sent as strings
                'dbname': self._p['rawDb']['dbname'], # name of the raw db
                'sid': ''  # When sid is Null it indicates start of a session
                }
            # the database for anonymization here is uber
            url = self._p['anonDb']['host']
            headers = {'Content-Type': 'application/json',
                       'Accept': 'application/json'}


            # Client stores the response sent by the simpleServer.py
            response = requests.get(url, json=request, headers=headers, timeout=20, verify=True)
            resp = response.json()  # Convert response sent by server to JSON

            if 'Error' in resp['Server Response']:
                pprint.pprint(resp)  # Client prints the data returned by the server
            else:   # if no error was encountered
                if self._vb:
                    pprint.pprint("Setting up connection with Uber_DP Server")
                    pprint.pprint(resp)  # Client prints the data returned by the server

                # in case there is no error, but we are at the "dummy query" to get the session ID
                self._sid = resp['Server Response']['Session ID']  # Set Session ID to value returned by server


        except requests.ConnectionError as e:
            print("Connection Error. Make sure you are connected to Internet.")
            print(str(e))

        except requests.Timeout as e:
            print("Timeout Error")
            print(str(e))

        except requests.RequestException as e:
            print("General Error")
            print(str(e))

        except KeyboardInterrupt:
            print("Program closed")

class EnhancedThread(threading.Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.daemon = True
        self._stopFlag = threading.Event()

    def stop(self):
        self._stopFlag.set()

    def stopped(self):
        return self._stopFlag.isSet()


class CacheThread(EnhancedThread):
    def __init__(self, theQueue, atcObject):
        super().__init__()
        self.theQueue = theQueue
        self.atcObject = atcObject
        self.name = self.getName() + " (cache thread)"
        self.dbConnection = None

    def run(self):
        while True:
            if self.stopped():
                logging.info(f' > {self.getName()} stopped.')
                break
            try:
                data = self.theQueue.get(block=True, timeout=3)
            except queue.Empty:
                continue
            if data is not None:
                self.atcObject.putCacheWrapper(*data)
                self.dbConnection = data[0] # this is connInsert for closing later
                if self.atcObject._p['verbose'] or self.atcObject._vb:
                    printTitle('cache insert successful. queue length: ' + str(self.theQueue.qsize()))
            self.theQueue.task_done()

    def stop(self):
        logging.debug("CacheThread received stop signal")
        super().stop()
        if self.dbConnection:
            try:
                self.dbConnection.interrupt()
            except sqlite3.ProgrammingError:
                pass
            else:
                logging.debug("interrupt signal sent to cacheDb for safe deleting cacheDb file later.")



def cleanBgThreads():
    for t in threading.enumerate():
        if isinstance(t, EnhancedThread) and (not t.stopped()):
            t.stop()
            t.join(timeout=1.0)

def printTitle(text):
    print(f'\n{" "+text:->46}\n')

def signal_kill_handler(signum, frame):
    global atcObject
    printTitle("Terminating the program ...")
    thread_info = (
        (f'    >> {set([t.name for t in threading.enumerate() if t != threading.main_thread()])} \n'
        f' > sending termination signal to all. please wait ... ') if threading.active_count() > 1
                                                                   else ''
    )
    logging.info(f'\n > active background threads: {threading.active_count() - 1} \n'
                 f'{thread_info}')
    # logging.info(f'\n > active background threads: {threading.active_count() - 1} \n'
    #              f'    >> {set([t.name for t in threading.enumerate() if t != threading.main_thread()])} \n'
    #              f' > sending termination signal to all. please wait ... ')
    cleanBgThreads()
    if atcObject:
        atcObject.cleanUp(cleanUpCache=False)

    sys.exit(-1)

def on_exit():
    if len([t for t in threading.enumerate() if isinstance(t, EnhancedThread) and (not t.stopped())]):
        cleanBgThreads()
    while threading.active_count() > 1:
        pass

signal.signal(signal.SIGTERM, signal_kill_handler)
signal.signal(signal.SIGINT, signal_kill_handler)

atexit.register(on_exit)

Classes

class gdaAttack (params)

Manages a GDA Attack

WARNING: this code is fragile, and can fail ungracefully, or just hang.

Everything gets set up with 'gdaAttack(params)'

params is a dictionary containing the following required parameters:
param['name']: The name of the attack. Make it unique, because the cache is discovered using this name.
param['rawDb']: The label for the DB to be used as the raw (non-anonymized) DB.
Following are the optional parameters:
param['criteria']: The criteria by which the attack should determined to succeed or fail. Must be one of 'singlingOut', 'inference', or 'linkability'. Default is 'singlingOut'.
param['anonDb']: The label for the DB to be used as the anonymized DB. (Is automatically set to param['rawDb'] if not set.)
param['pubDb']: The label for the DB to be used as the publicly known DB in linkability attacks.
param['table']: The table to be attacked. Must be present if the DB has more than one table.
param['uid']: The uid column for the table. Must be present if the name of the column is other than 'uid'.
param['flushCache']: Set to true if you want the cache of query answers from a previous run flushed. The purpose of the cache is to save the work from an aborted attack, which can be substantial because attacks can have hundreds of queries.
param['locCacheDir']: The directory holding the cache DBs. Default 'cacheDBs'.
param['numRawDbThreads']: The number of parallel queries that can be made to the raw DB. Default 3.
param['numAnonDbThreads']: The number of parallel queries that can be made to the anon DB. Default 3.
param['numPubDbThreads']: The number of parallel queries that can be made to the public linkability DB. Default 3.
param['verbose']: Set to True for verbose output.

param['dp_budget']: An optional overall privacy budget for the attack. For use with uber_dp. Default 'None'.

Expand source code

class gdaAttack:
    """Manages a GDA Attack

       WARNING: this code is fragile, and can fail ungracefully, or
       just hang."""

    def __init__(self, params):
        """ Everything gets set up with 'gdaAttack(params)'

            params is a dictionary containing the following
            required parameters: <br/>
            `param['name']`: The name of the attack. Make it unique, because
            the cache is discovered using this name. <br/>
            `param['rawDb']`: The label for the DB to be used as the
            raw (non-anonymized) DB. <br/>
            Following are the optional parameters: <br/>
            `param['criteria']`: The criteria by which the attack should
            determined to succeed or fail. Must be one of 'singlingOut',
            'inference', or 'linkability'. Default is 'singlingOut'. <br/>
            `param['anonDb']`: The label for the DB to be used as the
            anonymized DB. (Is automatically set to `param['rawDb']` if
            not set.) <br/>
            `param['pubDb']`: The label for the DB to be used as the
            publicly known DB in linkability attacks. <br/>
            `param['table']`: The table to be attacked. Must be present
            if the DB has more than one table. <br/>
            `param['uid']`: The uid column for the table. Must be present
            if the name of the column is other than 'uid'. <br/>
            `param['flushCache']`: Set to true if you want the cache of
            query answers from a previous run flushed. The purpose of the
            cache is to save the work from an aborted attack, which can be
            substantial because attacks can have hundreds of queries. <br/>
            `param['locCacheDir']`: The directory holding the cache DBs.
            Default 'cacheDBs'. <br/>
            `param['numRawDbThreads']`: The number of parallel queries
            that can be made to the raw DB. Default 3. <br/>
            `param['numAnonDbThreads']`: The number of parallel queries
            that can be made to the anon DB. Default 3. <br/>
            `param['numPubDbThreads']`: The number of parallel queries
            that can be made to the public linkability DB. Default 3. <br/>
            `param['verbose']`: Set to True for verbose output.

            `param['dp_budget']`: An optional overall privacy budget for the attack. For use with uber_dp. Default 'None'. <br/>
        """

        #### gda-score-code version check warning ####
        process = subprocess.run([sys.executable, "-m", "pip", "list","--outdated"],stdout=subprocess.PIPE,stderr=subprocess.PIPE,universal_newlines=True)
        upgradable_pkgs = process.stdout
        if "gda-score-code" in upgradable_pkgs:
            pkgs = upgradable_pkgs.split('\n')
            potential_gdascore_pkgs = list(filter(lambda x: 'gda-score-code' in x, pkgs))
            if len(potential_gdascore_pkgs) == 1:
                gdascore_pkg = potential_gdascore_pkgs[0]
                pkg_name, curr_ver, latest_ver, ins_type = (re.sub(r'\s+', '|', gdascore_pkg)
                                                               .split('|'))
                print('\n')
                logging.warning(f'WARNING: You have {pkg_name} version {curr_ver} installed; '
                                f'however, version {latest_ver} is available.')
                logging.warning(f'You should consider upgrading via the '
                                f'"pip install --upgrade {pkg_name}" command.')
                print('\n')
        ########

        ########### added by frzmohammadali ##########
        global theCacheQueue
        global theCacheThreadObject
        global flgCacheThreadStarted
        global atcObject

        if not theCacheQueue and not theCacheThreadObject:
            theCacheQueue = queue.Queue()
            theCacheThreadObject = CacheThread(theCacheQueue, self)
            atcObject = self
            printTitle('cache thread initialized.')

        self.cacheQueue = theCacheQueue
        self.cacheThreadObject = theCacheThreadObject
        if not flgCacheThreadStarted:
            self.cacheThreadObject.start()
            flgCacheThreadStarted = True
        ##############################################

        ############## parameters and instance variables ###############
        # ------------- Class called parameters and configured parameters
        self._vb = False
        self._cr = ''  # short for criteria
        self._pp = None  # pretty printer (for debugging)
        self._sid = None # for uber_dp interface, a session ID over the attack is needed
        self._session = None # also session for the uber_dp interface
        self._colNamesTypes = []
        self._colNames = []
        self._p = dict(name='',
                  rawDb='',
                  anonDb='',
                  pubDb='',
                  criteria='singlingOut',
                  table='',
                  uid='uid',
                  flushCache=False,
                  verbose=False,
                  # following not normally set by caller, but can be
                  locCacheDir="cacheDBs",
                  numRawDbThreads=3,
                  numAnonDbThreads=3,
                  numPubDbThreads=3,
                  )
        self._requiredParams = ['name', 'rawDb']

        # ---------- Private internal state
        # Threads
        self._rawThreads = []
        self._anonThreads = []
        self._pubThreads = []
        # Queues read by database threads _rawThreads and _anonThreads
        self._rawQ = None
        self._anonQ = None
        self._pubQ = None
        # Queues read by various caller functions
        self._exploreQ = None
        self._knowledgeQ = None
        self._attackQ = None
        self._claimQ = None
        self._guessQ = None
        # ask/get counters for setting 'stillToCome'
        self._exploreCounter = 0
        self._knowledgeCounter = 0
        self._attackCounter = 0
        self._claimCounter = 0
        self._guessCounter = 0
        # State for duplicate claim detection
        self._dupCheck = DupCheck()
        # State for computing attack results (see _initAtkRes())
        self._atrs = {}
        # State for various operational measures (see _initOp())
        self._op = {}
        ##############################################

        if self._vb:
            print(f"Calling {__name__}.init")
        if self._vb:
            print(f"   {params}")
        self._initOp()
        self._initCounters()
        self._assignGlobalParams(params)
        self._doParamChecks()
        for param in self._requiredParams:
            if len(self._p[param]) == 0:
                s = str(f"Error: Need param '{param}' in class parameters")
                sys.exit(s)

        # extract the type of interface we are interacting with the anonymization
        self._type = self._p['anonDb']['type']
        if self._type == 'uber_dp':
            # cannot run attack on uber dp without specifying the budget
            if self._p['dp_budget'] is None:
                s = str(f"Error: Needs param dp_budget in class parameters when running uber_dp attacks")
                sys.exit(s)

            # Assign the privacy budget as a parameter to the attack
            self._remaining_dp_budget = self._p['dp_budget']
            self._initUberDPSession()

            # if no session id was set, the attacks cannot be conducted
            if self._sid is None:
                s = str(f"Failed initializing session with Uber_DP Server")
                sys.exit(s)
        # create the database directory if it doesn't exist
        try:
            if not os.path.exists(self._p['locCacheDir']):
                os.makedirs(self._p['locCacheDir'])
        except OSError:
            sys.exit("Error: Creating directory. " + self._p['locCacheDir'])

        # Get the table name if not provided by the caller
        if len(self._p['table']) == 0:
            tables = self.getTableNames()
            if len(tables) != 1:
                print("Error: gdaAttack(): Must include table name if " +
                      "there is more than one table in database")
                sys.exit()
            self._p['table'] = tables[0]

        # Get the column names for computing susceptibility later
        self._colNamesTypes = self.getColNamesAndTypes()
        if self._vb:
            print(f"Columns are '{self._colNamesTypes}'")
        self._initAtkRes()
        # And make a convenient list of column names
        for colNameType in self._colNamesTypes:
            self._colNames.append(colNameType[0])

        # Setup the database which holds already executed queries so we
        # don't have to repeat them if we are restarting
        self._setupLocalCacheDB()
        # Setup the threads and queues
        self._setupThreadsAndQueues()
        numThreads = threading.active_count()
        expectedThreads = (self._p['numRawDbThreads'] +
                           self._p['numAnonDbThreads'] + 1)
        if len(self._p['pubDb']) > 0:
            expectedThreads += self._p['numPubDbThreads']
        if numThreads < expectedThreads:
            print(f"Error: Some thread(s) died "
                  f"(count {numThreads}, expected {expectedThreads}). "
                  f"Aborting.")
            self.cleanUp(cleanUpCache=False, doExit=True)

    def getResults(self):
        """ Returns all of the compiled attack results.

            This can be input to class `gdaScores()` and method
            `gdaScores.addResult()`."""
        # Add the operational parameters
        self._atrs['operational'] = self.getOpParameters()
        self._cleanPasswords()
        return self._atrs

    def getOpParameters(self):
        """ Returns a variety of performance measurements.

            Useful for debugging."""
        self._op['avQueryDuration'] = 0
        if self._op['numQueries'] > 0:
            self._op['avQueryDuration'] = (
                    self._op['timeQueries'] / self._op['numQueries'])
        self._op['avCachePutDuration'] = 0
        if self._op['numCachePuts'] > 0:
            self._op['avCachePutDuration'] = (
                    self._op['timeCachePuts'] / self._op['numCachePuts'])
        self._op['avCacheGetDuration'] = 0
        if self._op['numCacheGets'] > 0:
            self._op['avCacheGetDuration'] = (
                    self._op['timeCacheGets'] / self._op['numCacheGets'])
        return self._op

    def setVerbose(self):
        """Sets Verbose to True"""
        self._vb = True

    def unsetVerbose(self):
        """Sets Verbose to False"""
        self._vb = False

    def cleanUp(self, cleanUpCache=True, doExit=False,
                exitMsg="Finished cleanUp, exiting"):
        """ Garbage collect queues, threads, and cache.

            By default, this wipes the cache. The idea being that if the
            entire attack finished successfully, then it won't be
            repeated and the cache isn't needed. Do `cleanUpCache=False`
            if that isn't what you want."""
        if self._vb: print(f"Calling {__name__}.cleanUp")
        if self._rawQ.empty() != True:
            logging.warning("Warning, trying to clean up when raw queue not empty!")
        if self._anonQ.empty() != True:
            logging.warning("Warning, trying to clean up when anon queue not empty!")
        if self.cacheQueue.empty() != True:
            logging.warning("Warning, trying to clean up when cache queue not empty!")
        # Stuff in end signals for the workers (this is a bit bogus, cause
        # if a thread is gone or hanging, not all signals will get read)
        for i in range(self._p['numRawDbThreads']):
            self._rawQ.put(None)
        for i in range(self._p['numAnonDbThreads']):
            self._anonQ.put(None)

        for i in range(self.cacheQueue.qsize()):
            self.cacheQueue.put(None)

        cleanBgThreads()

        if len(self._p['pubDb']) > 0:
            if self._pubQ.empty() != True:
                print("Warning, trying to clean up when pub queue not empty!")
            for i in range(self._p['numPubDbThreads']):
                self._pubQ.put(None)
            for t in self._pubThreads:
                if t.isAlive(): t.stop() # t.join()
        if cleanUpCache:
            self._removeLocalCacheDB()
        if self._session: # close the uber session
            self._session.close()
        if doExit:
            sys.exit(exitMsg)

    def isClaimed(self, spec):
        """Check if a claim was already fully or partially made.

        The `spec` is formatted identical to the `spec` in `gdaAttack.askClaim`."""
        return self._dupCheck.is_claimed(spec, verbose=self._vb)

    def askClaim(self, spec, cache=True, claim=True):
        """Generate Claim query for raw and optionally pub databases.

        Before anything happens, the system uses the `gdaAttack.isClaimed`
        method to determine whether a previous claim fully or partially
        matches the new claim. Such duplicates are not allowed and an error
        will be raised providing additional details about the duplicate.

        Making a claim results in a query to the raw database, and if
        linkability attack, the pub database, to check
        the correctness of the claim. Multiple calls to this method will
        cause the corresponding queries to be queued up, so `askClaim()`
        returns immediately. `getClaim()` harvests one claim result. <br/>
        Set `claim=False` if this claim should not be applied to the
        confidence improvement score. In this case, the probability score
        will instead be reduced accordingly. <br/>
        The `spec` is formatted as follows: <br/>

            {'known':[{'col':'colName','val':'value'},...],
              'guess':[{'col':'colName','val':'value'},...],
            }

        `spec['known']` are the columns and values the attacker already knows
        (i.e. with prior knowledge). Optional. <br/>
        `spec['guess']` are the columns and values the attacker doesn't know,
        but rather is trying to predict. Mandatory for 'singling out'
        and 'inference'. Optional for 'linkabiblity' <br/>
        Answers are cached <br/>
        Returns immediately"""
        if self._vb: print(f"Calling {__name__}.askClaim with spec '{spec}', count {self._claimCounter}")
        if not self._dupCheck.is_claimed(spec, verbose=self._vb, raise_true=True):
            self._dupCheck.claim(spec, verbose=self._vb)
        self._claimCounter += 1
        sql = self._makeSqlFromSpec(spec)
        if self._vb: print(f"Sql is '{sql}'")
        sqlConfs = self._makeSqlConfFromSpec(spec)
        if self._vb: print(f"SqlConf is '{sqlConfs}'")
        # Make a copy of the query for passing around
        job = {}
        job['q'] = self._claimQ
        job['claim'] = claim
        job['queries'] = [{'sql': sql, 'cache': cache}]
        job['spec'] = spec
        for sqlConf in sqlConfs:
            job['queries'].append({'sql': sqlConf, 'cache': cache})
        self._rawQ.put(job)

    def getClaim(self):
        """ Wait for and gather results of askClaim() calls

            Returns a data structure that contains both the result
            of one finished claim, and the claim's input parameters.
            Note that the order in which results are returned by
            `getClaim()` are not necessarily the same order they were
            inserted by `askClaim()`. <br/>
            Assuming `result` is returned: <br/>
            `result['claim']` is the value supplied in the corresponding
            `askClaim()` call <br/>
            `result['spec']` is a copy of the `spec` supplied in the
            corresponding `askClaim()` call. <br/>
            `result['queries']` is a list of the queries generated in order to
            validate the claim. <br/>
            `result['answers']` are the answers to the queries in
            `result['queries']`. <br/>
            `result['claimResult']` is 'Correct' or 'Incorrect', depending
            on whether the claim satisfies the critieria or not. <br/>
            `result['stillToCome']` is a counter showing how many more
            claims are still queued. When `stillToCome` is 0, then all
            claims submitted by `askClaim()` have been returned."""

        if self._vb:
            print(f"Calling {__name__}.getClaim")
        if self._claimCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0, 'claimResult': 'Error'}
        job = self._claimQ.get()
        claim = job['claim']
        self._claimQ.task_done()
        self._claimCounter -= 1
        job['stillToCome'] = self._claimCounter
        self._addToAtkRes('claimTrials', job['spec'], 1)
        # The claim is tested against the first reply
        reply = job['replies'][0]
        job['claimResult'] = 'Wrong'
        if claim:
            self._addToAtkRes('claimMade', job['spec'], 1)
        if 'error' in reply:
            self._addToAtkRes('claimError', job['spec'], 1)
            job['claimResult'] = 'Error'
        else:
            if self._cr == 'singlingOut':
                claimIsCorrect = self._checkSinglingOut(reply['answer'])
            elif self._cr == 'inference':
                claimIsCorrect = self._checkInference(reply['answer'])
            elif self._cr == 'linkability':
                claimIsCorrect = self._checkLinkability(reply['answer'])
            if claim == 1 and claimIsCorrect:
                self._addToAtkRes('claimCorrect', job['spec'], 1)
                job['claimResult'] = 'Correct'
            elif claim == 0 and claimIsCorrect:
                self._addToAtkRes('claimPassCorrect', job['spec'], 1)
                job['claimResult'] = 'Correct'
        if self._cr == 'singlingOut' or self._cr == 'inference':
            # Then measure confidence against the second and third replies
            if 'answer' in job['replies'][1]:
                if job['replies'][1]['answer']:
                    guessedRows = job['replies'][1]['answer'][0][0]
                else:
                    guessedRows = 0
            elif 'error' in job['replies'][1]:
                self._pp.pprint(job)
                print(f"Error: conf query:\n{job['replies'][1]['error']}")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if 'answer' in job['replies'][2]:
                if job['replies'][2]['answer']:
                    totalRows = job['replies'][2]['answer'][0][0]
                else:
                    totalRows = 0
            elif 'error' in job['replies'][2]:
                self._pp.pprint(job)
                print(f"Error: conf query:\n{job['replies'][2]['error']}")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if totalRows:
                self._addToAtkRes('sumConfidenceRatios', job['spec'],
                                  guessedRows / totalRows)
                self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
                self._atrs['tableStats']['totalRows'] = totalRows
        else:
            # For linkability, the confidence is always 1/2
            self._addToAtkRes('sumConfidenceRatios', job['spec'], 0.5)
            self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
        if 'q' in job:
            del job['q']
        return (job)

    def askAttack(self, query, cache=True):
        """ Generate and queue up an attack query for database.

            `query` is a dictionary with (currently) one value: <br/>
            `query['sql']` contains the SQL query. <br/>
            `query['epsilon']` is optional, and defines how much of the differential privacy budget is used for uber_dp <br/>
        """
        self._attackCounter += 1
        if self._vb: print(f"Calling {__name__}.askAttack with query '{query}', count {self._attackCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._attackQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        self._anonQ.put(job)

    def getAttack(self):
        """ Returns the result of one askAttack() call

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askAttack()` calls were made. <br/>
            Assuming `result` is returned: <br/>
            `result['answer']` is the answer returned by the DB. The
            format is: <br/>
                `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
            where C1 is the first element of the `SELECT`, C2 the second
            element, etc. This attribute does not exist in cases of query
            error (i.e. bad sql, budget exceeded if uber_dp, etc.) <br/>
            `result['cells']` is the number of cells returned in the answer
            (used by `gdaAttack()` to compute total attack cells) <br/>
            `result['query']['sql']` is the query from the corresponding
            `askAttack()`.
            `result['error']` contains the error description <br/>
            `result['remaining_dp_budget']` contains the remaining differential
            privacy budget when uber_dp is used. <br/>
            """

        if self._vb:
            print(f"Calling {__name__}.getAttack")
        if self._attackCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._attackQ.get()
        self._attackQ.task_done()
        self._attackCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._attackCounter
        self._atrs['base']['attackGets'] += 1
        if 'cells' in reply:
            if reply['cells'] == 0:
                self._atrs['base']['attackCells'] += 1
            else:
                self._atrs['base']['attackCells'] += reply['cells']
        else:
            self._atrs['base']['attackCells'] += 1

        if self._type == 'uber_dp':
            reply['remaining_dp_budget'] = self._remaining_dp_budget
        return (reply)

    def askKnowledge(self, query, cache=True):
        """ Generate and queue up a prior knowledge query for database

            The class keeps track of how many prior knowledge cells were
            returned and uses this to compute a score. <br/>
            Input parameters formatted the same as with `askAttack()`"""

        self._knowledgeCounter += 1
        if self._vb: print(f"Calling {__name__}.askKnowledge with query "
                           f"'{query}', count {self._knowledgeCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._knowledgeQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        self._rawQ.put(job)

    def getKnowledge(self):
        """ Wait for and gather results of prior askKnowledge() calls

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askKnowledge()` calls were made. <br/>
            Return parameter formatted the same as with `getAttack()`"""

        if self._vb:
            print(f"Calling {__name__}.getKnowledge")
        if self._knowledgeCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._knowledgeQ.get()
        self._knowledgeQ.task_done()
        self._knowledgeCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._knowledgeCounter
        self._atrs['base']['knowledgeGets'] += 1
        if 'cells' in reply:
            self._atrs['base']['knowledgeCells'] += reply['cells']
        return (reply)

    def askExplore(self, query, cache=True):
        """ Generate and queue up an exploritory query for database

            No score book-keeping is done here. An analyst may make
            any number of queries without impacting the GDA score. <br/>
            `query` is a dictionary with two values: <br/>
            `query['sql']` contains the SQL query. <br/>
            `query['db']` determines which database is queried, and
            is one of 'rawDb', 'anonDb', or (if linkability), 'pubDb'."""

        self._exploreCounter += 1
        if self._vb: print(f"Calling {__name__}.askExplore with "
                           f"query '{query}', count {self._exploreCounter}")
        # Make a copy of the query for passing around
        qCopy = copy.copy(query)
        job = {}
        job['q'] = self._exploreQ
        qCopy['cache'] = cache
        job['queries'] = [qCopy]
        if qCopy['db'] == 'rawDb' or qCopy['db'] == 'raw':
            self._rawQ.put(job)
        elif qCopy['db'] == 'anonDb' or qCopy['db'] == 'anon':
            self._anonQ.put(job)
        else:
            self._pubQ.put(job)

    def getExplore(self):
        """ Wait for and gather results of prior askExplore() calls.

            Blocks until the result is available. Note that the order
            in which results are received is not necesarily the order
            in which `askExplore()` calls were made. <br/>
            Return parameter formatted the same as with `getAttack()`"""
        if self._vb:
            print(f"Calling {__name__}.getExplore")
        if self._exploreCounter == 0:
            # Caller shouldn't be calling if there are no expected
            # answers, but is anyway, so just return
            return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                    'stillToCome': 0}
        job = self._exploreQ.get()
        self._exploreQ.task_done()
        self._exploreCounter -= 1
        reply = job['replies'][0]
        reply['stillToCome'] = self._exploreCounter
        return (reply)

    def getPublicColValues(self, colName, tableName=''):
        """Return list of "publicly known" column values and counts

        Column value has index 0, count of distinct UIDs has index 1
        Must specify column name.
        """
        if len(colName) == 0:
            print(f"Must specify column 'colName'")
            return None

        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # First we need to know the total number of distinct users
        sql = str(f"""select count(distinct {self._p['uid']})
                      from {tableName}""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        numUid = ans[0][0]
        # Query the raw db for values in the column
        sql = str(f"""select {colName}, count(distinct {self._p['uid']})
                      from {tableName}
                      group by 1
                      order by 2 desc
                      limit 200""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            # row[0] is the value, row[1] is the count
            if (((row[1] / numUid) > 0.002) and
                    (row[1] >= 50)):
                ret.append((row[0], row[1]))
        conn.close()
        return ret

    def getColNames(self, dbType='rawDb', tableName=''):
        """Return simple list of column names

        `dbType` is one of 'rawDb' or 'anonDb'"""

        if len(tableName) == 0:
            colsAndTypes = self.getColNamesAndTypes(dbType=dbType)
        else:
            colsAndTypes = self.getColNamesAndTypes(
                dbType=dbType, tableName=tableName)
        if not colsAndTypes:
            return None
        cols = []
        for tup in colsAndTypes:
            cols.append(tup[0])
        return cols

    def getAttackTableName(self):
        """Returns the name of the table being used in the attack."""
        return self._p['table']

    def getTableCharacteristics(self, tableName=''):
        """Returns the full contents of the table characteristics

           Return value is a dict indexed by column name: <br/>

               { '<colName>':
                   {
                       'av_rows_per_vals': 3.93149,
                       'av_uids_per_val': 0.468698,
                       'column_label': 'continuous',
                       'column_name': 'dropoff_latitude',
                       'column_type': 'real',
                       'max': '898.29382000000000',
                       'min': '-0.56333297000000',
                       'num_distinct_vals': 24216,
                       'num_rows': 95205,
                       'num_uids': 11350,
                       'std_rows_per_val': 10.8547,
                       'std_uids_per_val': 4.09688},
                   }
               }

        """
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Modify table name to the default for the characteristics table
        tableName += '_char'

        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Set up return dict
        ret = {}
        # Query it for column names
        sql = str(f"""select column_name, data_type 
                  from information_schema.columns where
                  table_name='{tableName}'""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        cols = cur.fetchall()
        # Make index for column name (should be 0, but just to be sure)
        for colNameIndex in range(len(cols)):
            if cols[colNameIndex][0] == 'column_name':
                break

        # Query it for table contents
        sql = str(f"SELECT * FROM {tableName}")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        for row in ans:
            colName = row[colNameIndex]
            ret[colName] = {}
            for i in range(len(row)):
                ret[colName][cols[i][0]] = row[i]
        conn.close()
        return ret

    def getAnonTableCharacteristics(self, tableName=''):
        """Returns the full contents of the table characteristics

           Return value is a dict indexed by column name: <br/>

               { '<colName>':
                   {
                       'av_rows_per_vals': 3.93149,
                       'av_uids_per_val': 0.468698,
                       'column_label': 'continuous',
                       'column_name': 'dropoff_latitude',
                       'column_type': 'real',
                       'max': '898.29382000000000',
                       'min': '-0.56333297000000',
                       'num_distinct_vals': 24216,
                       'num_rows': 95205,
                       'num_uids': 11350,
                       'std_rows_per_val': 10.8547,
                       'std_uids_per_val': 4.09688},
                   }
               }

        """
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Modify table name to the default for the characteristics table
        # tableName += '_char'

        # Establish connection to database
        db = getDatabaseInfo(self._p['anonDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()

        # Query it for column names
        sql = str(f"""select column_name, data_type 
                  from information_schema.columns
                  where table_schema NOT IN ('information_schema', 'pg_catalog') and 
                  table_name='{tableName}'""")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getAnonTableCharacteristics() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()

        # Set up return dict
        ret = {_row[0]: {'column_name': _row[0], 'column_type': _row[1]} for _row in ans}

        conn.close()
        return ret

    # Note that following is used internally, but we expose it to the
    # caller as well because it is a useful function for exploration
    def getColNamesAndTypes(self, dbType='rawDb', tableName=''):
        """Return raw database column names and types (or None if error)

        dbType is one of 'rawDb' or 'anonDb' <br/>
        return format: [(col,type),(col,type),...]"""
        if len(tableName) == 0:
            # caller didn't supply a table name, so get it from the
            # class init
            tableName = self._p['table']

        # Establish connection to database
        db = getDatabaseInfo(self._p[dbType])
        if db['type'] != 'postgres' and db['type'] != 'aircloak':
            print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
            return None
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Query it for column names
        if db['type'] == 'postgres':
            sql = str(f"""select column_name, data_type 
                      from information_schema.columns where
                      table_name='{tableName}'""")
        elif db['type'] == 'aircloak':
            sql = str(f"show columns from {tableName}")
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getColNamesAndTypes() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            ret.append((row[0], row[1]))
        conn.close()
        return ret

    def getTableNames(self, dbType='rawDb'):
        """Return database table names

        dbType is one of 'rawDb' or 'anonDb' <br/>
        Table names returned as list, unless error then return None"""

        # Establish connection to database
        db = getDatabaseInfo(self._p[dbType])
        if db['type'] != 'postgres' and db['type'] != 'aircloak':
            print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
            return None
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        # Query it for column names
        if db['type'] == 'postgres':
            sql = """SELECT tablename
                     FROM pg_catalog.pg_tables
                     WHERE schemaname != 'pg_catalog' AND
                           schemaname != 'information_schema'"""
        elif db['type'] == 'aircloak':
            sql = "show tables"
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getTableNames() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        ret = []
        for row in ans:
            ret.append(row[0])
        conn.close()
        return ret

    def getUidColName(self):
        """ Returns the name of the UID column"""
        return self._p['uid']

    def getPriorKnowledge(self, dataColumns, method,
            fraction=None, count=None, selectColumn=None, colRange=[None,None], values=[None]):
        """ Returns data from the rawDB according to a specification

        This mimics external knowledge that an attacker may have about the data, and
        influences the 'knowledge' part of the GDA Score. <br/>
            `dataColumns` is a list of column names. The data for these columns is returned <br/>
            `method` can be 'rows' or 'users'. If 'rows', then rows are selected
            according to the criteria (`fraction`, `count`, `selectColumn`, `colRange`,
            or `values`).
            If 'users', then all rows for a set of selected users is returned.
            The users are selected according to the criteria (`fraction` or `count`) <br/>
            If none of the criteria are set, or if `fraction` is set to 1.0, then all
            rows are returned (for the selected column values) One of `fraction`, `count`,
            or `selectColumn` must be set. <br/>
            `fraction` or `count` are set to obtain a random set of rows or users. If
            `fraction`, then an approximate fraction of all rows/users is selected.
            `fraction` is a value between 0 and 1.0. If `count`, then exactly `count`
            random rows/users are selected. <br/>
            `selectColumn` is set to select rows according to the values of the specified
            column. `selectColumn` is a column name. If set, then either a range of
            values (`colRange`), or a set of values (`values`) must be chosen. <br/>
            `colRange` is
            a list with two values: `[min,max]`. This selects all values
            between min and max inclusive. <br/>
            `values` is a list
            of one or more values of any type. This selects all values matching those in
            the list. <br/>
            The return value is a list in this format: <br/>
                `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
            where C1 corresponds to the first column in `dataColumns`, C2 corresponds to
            the second column in `dataColumns`, and so on.  <br/>
        """
        # Check input parameters
        if not isinstance(dataColumns, list):
            print(f"getPriorKnowledge Error: dataColumns must be a list of one or more column names")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if method not in ['rows','users']:
            print(f"getPriorKnowledge Error: method must be 'rows' or 'users'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if fraction is None and count is None and selectColumn is None:
            print(f"getPriorKnowledge Error: one of fraction, count, or selectColumn must be set")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if fraction and not isinstance(fraction, float):
            print(f"getPriorKnowledge Error: if set, fraction must be a float")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if (fraction and (count or selectColumn)) or (count and (fraction or selectColumn)):
            print(f"getPriorKnowledge Error: only one of fraction, count, or selectColumn may be set")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if count and not isinstance(count, int):
            print(f"getPriorKnowledge Error: if set, count must be an integer")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if selectColumn:
            if selectColumn not in self._colNames:
                print(f"getPriorKnowledge Error: selectColumn '{selectColumn}' is not a valid column")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if colRange == [None,None] and values == [None]:
                print(f"getPriorKnowledge Error: if selectColumn is set, one of colRange or values must be set")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if not isinstance(colRange, list):
                print(f"getPriorKnowledge Error: colRange must be a list with two values")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if not (isinstance(values, list) or isinstance(values, tuple)) or len(values) == 0:
                print(f"getPriorKnowledge Error: values must be a list or tuple with one or more values")
                self.cleanUp(cleanUpCache=False, doExit=True)
        for col in dataColumns:
            if col not in self._colNames:
                print(f"getPriorKnowledge Error: column '{col}' is not a valid column")
                self.cleanUp(cleanUpCache=False, doExit=True)
        # Basic input checks finished
        # Establish connection to database
        db = getDatabaseInfo(self._p['rawDb'])
        connStr = str(
            f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
        conn = psycopg2.connect(connStr)
        cur = conn.cursor()
        table = self._p['table']
        uid = self._p['uid']
        # Make the SELECT part of the SQL query
        initSql = 'SELECT '
        for col in dataColumns:
            initSql += str(f"{col}, ")
        initSql = initSql[0:-2]
        if method == 'rows' and fraction:
            sql = initSql + str(f" FROM {table} WHERE random() <= {fraction}")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'users' and fraction:
            sql = initSql + str(f" FROM {table} WHERE {uid} IN ")
            sql += str(f"(SELECT {uid} from (SELECT DISTINCT {uid} FROM {table}) t WHERE random() < {fraction})")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and colRange[0] is not None:
            for pair in self._colNamesTypes:
                if selectColumn in pair[0]:
                    colType = pair[1]
                    break
            if 'text' in colType or 'char' in colType or 'date' in colType or 'time' in colType:
                sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= '{colRange[0]}' and {selectColumn} <= '{colRange[1]}'")
            else:
                sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= {colRange[0]} and {selectColumn} <= {colRange[1]}")
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and values[0] is not None:
            sql = initSql + str(f" FROM {table} WHERE {selectColumn} IN (")
            for pair in self._colNamesTypes:
                if selectColumn in pair[0]:
                    colType = pair[1]
                    break
            for value in values:
                if "text" in colType or "date" in colType or "time" in colType:
                    sql += str(f"'{value}', ")
                else:
                    sql += str(f"{value}, ")
            sql = sql[0:-2]
            sql += ")"
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'rows' and count:
            # need to know the total number of rows
            sql = str(f"select count(*) from {table}")
            ans = self._doQuery(cur,sql)
            numRows = ans[0][0]
            # next we get some random set of rows that is certainly more than we need
            frac = (count/numRows)*2
            sql = initSql + str(f" FROM {table} WHERE random() <= {frac}")
            temp = self._doQuery(cur,sql)
            # next we scramble these so that we get a random sampling from the random sampling
            random.shuffle(temp)
            # finally pick the exact count
            ans = temp[0:count]
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        if method == 'users' and count:
            # get the full list of distinct UIDs
            sql = str(f"SELECT DISTINCT {uid} from {table}")
            uidList = self._doQuery(cur,sql)
            # next we scramble these so that we can get a random sampling
            random.shuffle(uidList)
            # pick the exact count of UIDs
            uidList = uidList[0:count]
            sql = initSql + str(f" FROM {table} WHERE {uid} IN (")
            for pair in self._colNamesTypes:
                if uid in pair[0]:
                    colType = pair[1]
                    break
            for uidVal in uidList:
                if "text" in colType or "date" in colType or "time" in colType:
                    sql += str(f"'{uidVal[0]}', ")
                else:
                    sql += str(f"{uidVal[0]}, ")
            sql = sql[0:-2]
            sql += ")"
            ans = self._doQuery(cur,sql)
            self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
            return(ans)
        #zzzz
        return None
    #def getPriorKnowledge(self, dataColumns, method,
            #fraction=None, count=None, selectColumn=None, colRange=[None,None], values=[None]):

    # -------------- Private Methods -------------------
    def _doQuery(self,cur,sql):
        try:
            cur.execute(sql)
        except psycopg2.Error as e:
            print(f"Error: getPublicColValues() query: '{e}'")
            self.cleanUp(cleanUpCache=False, doExit=True)
        ans = cur.fetchall()
        return ans

    def _cleanPasswords(self):
        if 'attack' in self._atrs:
            if ('anonDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['anonDb']):
                self._atrs['attack']['anonDb']['password'] = 'xxxxxxx'
            if ('rawDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['rawDb']):
                self._atrs['attack']['rawDb']['password'] = 'xxxxxxx'
            if ('pubDb' in self._atrs['attack'] and
                    'password' in self._atrs['attack']['pubDb']):
                self._atrs['attack']['pubDb']['password'] = 'xxxxxxx'

        return

    def _assignGlobalParams(self, params):
        self._pp = pprint.PrettyPrinter(indent=4)
        for key, val in params.items():
            self._p[key] = val
            # assign verbose value to a smaller variable name
            if key == "verbose":
                if val != False:
                    self._vb = True
            # Check criteria
            if key == "criteria":
                if (val == 'singlingOut' or val == 'inference' or
                        val == 'linkability'):
                    self._cr = val
                else:
                    print("""Error: criteria must be one of 'singlingOut',
                             'inference', or 'linkability'""")
                    sys.exit('')

    def _setupLocalCacheDB(self):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        conn = sqlite3.connect(path)
        cur = conn.cursor()
        if self._p['flushCache'] == True:
            sql = "DROP TABLE IF EXISTS tab"
            if self._vb: print(f"   cache DB: {sql}")
            cur.execute(sql)
        sql = """CREATE TABLE IF NOT EXISTS tab
                 (qid text primary key, answer text)"""
        if self._vb: print(f"   cache DB: {sql}")
        cur.execute(sql)
        # conn.commit()
        cur.execute("PRAGMA journal_mode=WAL;")
        # conn.commit()
        conn.close()

    def _removeLocalCacheDB(self):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        max_attempts = 5
        attempt = 0
        removeFlag = False
        _ex = None
        if os.path.exists(path):
            while attempt <= max_attempts:
                attempt += 1
                try:
                    os.remove(path)
                    removeFlag = True
                    break
                except Exception as ex:
                    _ex = ex
                    removeFlag = False
                time.sleep(0.3)

            if not removeFlag:
                logging.error(f"cache db removing error after {attempt} attempts.\n"
                              f"ERROR: Failed to remove cache DB {path} => ex: {_ex}")
            else:
                logging.info(f"cache db removed successfully after {attempt} attempt(s).")

    def removeLocalCacheDBWrapper(self):
        return self._removeLocalCacheDB()

    def _setupThreadsAndQueues(self):
        self._anonThreads = []
        self._rawThreads = []
        self._pubThreads = []
        self._exploreQ = queue.Queue()
        self._knowledgeQ = queue.Queue()
        self._attackQ = queue.Queue()
        self._claimQ = queue.Queue()
        self._guessQ = queue.Queue()
        self._rawQ = queue.Queue()
        if self._cr == 'linkability':
            self._pubQ = queue.Queue()
        self._anonQ = queue.Queue()
        backQ = queue.Queue()
        for i in range(self._p['numRawDbThreads']):
            d = dict(db=self._p['rawDb'], q=self._rawQ,
                     kind='raw', backQ=backQ)
            t = EnhancedThread(target=self._dbWorker, kwargs=d)
            t.start()
            self._rawThreads.append(t)
        for i in range(self._p['numAnonDbThreads']):
            d = dict(db=self._p['anonDb'], q=self._anonQ,
                     kind='anon', backQ=backQ)
            t = EnhancedThread(target=self._dbWorker, kwargs=d)
            t.start()
            self._anonThreads.append(t)
        if self._cr == 'linkability':
            for i in range(self._p['numPubDbThreads']):
                d = dict(db=self._p['pubDb'], q=self._pubQ,
                         kind='pub', backQ=backQ)
                t = EnhancedThread(target=self._dbWorker, kwargs=d)
                t.start()
                self._pubThreads.append(t)
        num = (self._p['numRawDbThreads'] + self._p['numAnonDbThreads'])
        if self._cr == 'linkability':
            num += self._p['numPubDbThreads']
        # Make sure all the worker threads are ready
        for i in range(num):
            msg = backQ.get()
            if self._vb: print(f"{msg} is ready")
            backQ.task_done()

    def _dbWorker(self, db, q, kind, backQ):

        # uber dp has a different interface than aircloak or postgres
        if db['type'] == 'uber_dp':
            if self._vb: print(f"Starting {__name__}.serverWorker:{db, kind}")
            me = threading.current_thread()
            backQ.put(me)
            while True:
                jobOrig = q.get()
                q.task_done()
                if jobOrig is None:
                    if self._vb:
                        print(f"    {me}: serverWorker done {db, kind}")
                    break
                # make a copy for passing around
                job = copy.copy(jobOrig)
                replyQ = job['q']
                replies = []  # holds all the reply dicts
                for query in job['queries']:
                    reply = self._processUberQuery(query)
                    replies.append(reply)
                job['replies'] = replies
                replyQ.put(job)

        elif db['type'] == 'aircloak' or db['type'] == 'postgres':
            if self._vb: print(f"Starting {__name__}.dbWorker:{db, kind}")
            me = threading.current_thread()
            d = getDatabaseInfo(db)
            # Establish connection to database
            connStr = str(
                f"host={d['host']} port={d['port']} dbname={d['dbname']} user={d['user']} password={d['password']}")
            if self._vb: print(f"    {me}: Connect to DB with DSN '{connStr}'")
            conn = psycopg2.connect(connStr)
            cur = conn.cursor()
            # Establish connection to local cache
            path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
            # Set timeout low so that we don't spend a lot of time inserting
            # into the cache in case it gets overloaded
            connInsert = sqlite3.connect(path, timeout=0.1)
            curInsert = connInsert.cursor()
            connRead = sqlite3.connect(path)
            curRead = connRead.cursor()
            backQ.put(me)
            while True:
                if isinstance(me, EnhancedThread) and me.stopped():
                    logging.info(f' > {me.getName()} stopped.')
                    return
                try:
                    jobOrig = q.get(block=True, timeout=3)
                except queue.Empty:
                    continue
                q.task_done()
                if jobOrig is None:
                    if self._vb: print(f"    {me}: dbWorker done {db, kind}")
                    conn.close()
                    connRead.close()
                    connInsert.close()
                    break
                # make a copy for passing around
                job = copy.copy(jobOrig)
                replyQ = job['q']
                replies = []
                for query in job['queries']:
                    reply = self._processQuery(query, conn, cur,
                                               connInsert, curInsert, curRead)
                    replies.append(reply)
                job['replies'] = replies
                replyQ.put(job)

    def _processUberQuery(self, query):
        # Once the session ID is defined, we stay in that session
        # ONLY `epsilon` and the `query` can be set
        # `budget` and `dbname` just have placeholders because they cannot be changed anyways
        request = {
            'query': query['sql'],
            'epsilon': str(query['epsilon']),
            'count' : '1', # the interface is designed in a way such that repeted attacks need to be triggered
            # by several askAttack(), getAttack(). Therefore, the server functionality to potentially execute the same
            # query several times is not used
            'budget': 'None',
            'dbname': 'None',
            'sid': self._sid
        }

        start = time.perf_counter()  # store the time of query execution

        url = self._p['anonDb']['host']
        headers = {'Content-Type': 'application/json',
                       'Accept': 'application/json'}  # Headers to be sent in the client request
        # Client stores the response sent by the simpleServer.py
        try:
            response = requests.get(url, json=request, headers=headers, timeout=100, verify=True)

            resp = response.json()  # Convert response sent by server to JSON
            if self._vb:
                print("Server response for the given query: ")
                print(resp)
            if 'Error' in resp['Server Response']:
                # If budget exceeded, we do not provide an answer field in the reply
                if 'Budget Exceeded' in resp['Server Response']['Error']:
                    print("This query does exceed the remaining privacy budget for your attack.")
                    print("Your remaining budget is "+str(self._remaining_dp_budget)+", the query would need "+str(query['epsilon'])+".")
                    reply = dict(error='Budget Exceeded')
            else:
                # if the query went through, we can deduct its privay consumption to keep track internally
                self._remaining_dp_budget -= query['epsilon']

                # the answer of dp queries is a single value (as it computes the aggregate over several query rows)
                # to match the format needed to compute number of cells, we still need two dimensions
                # therefore, [[]]
                ans = [[float((resp['Server Response']['Result']))]]# record the answer and append it as a 1-element list of float

                # for statistics. Only makes sense to count query if it went through
                self._op['numQueries'] += 1

                # after all for loops find the shape of the resulting answers
                numCells = self._computeNumCells(ans)

                # format the reply similarly as for aircloak and postgres
                reply = dict(answer=ans, cells=numCells)

        except requests.ConnectionError as e:
            print("Connection Error. Make sure you are connected to Internet.")
            print(str(e))

        except requests.Timeout as e:
            print("Timeout Error")
            print(str(e))

        except requests.RequestException as e:
            print("General Error")
            print(str(e))

        except KeyboardInterrupt:
            print("Program closed")


        reply['query'] = query

        # calculate the time we needed for the query
        end = time.perf_counter()
        duration = end - start

        self._op['timeQueries'] += duration

        return reply

    def _processQuery(self, query, conn, cur, connInsert, curInsert, curRead, queryType='db'):
        # record and remove the return queue
        # queryType specifies if we are asking the queries from a db (aircloak, postgres)
        # or from a server, like uber_dp
        if queryType == 'server':
            pass
        elif queryType == 'db':
            cache = query['cache']
            del query['cache']
            # Check the cache for the answer
            # Note that at this point query is a dict
            # containing the sql, the db (raw, anon, or pub),
            # and any tags that the source added
            cachedReply = None
            if cache:
                cachedReply = self._getCache(curRead, query)
            if cachedReply:
                if self._vb: print("    Answer from cache")
                if 'answer' in cachedReply:
                    numCells = self._computeNumCells(cachedReply['answer'])
                    cachedReply['cells'] = numCells
                return cachedReply
            else:
                start = time.perf_counter()
                try:
                    cur.execute(query['sql'])
                except psycopg2.Error as e:
                    reply = dict(error=e.pgerror)
                else:
                    ans = cur.fetchall()
                    numCells = self._computeNumCells(ans)
                    reply = dict(answer=ans, cells=numCells)
                end = time.perf_counter()
                duration = end - start
                self._op['numQueries'] += 1
                self._op['timeQueries'] += duration
                reply['query'] = query
                # only cache if the native query is slow
                if duration > 0.1:
                    # self._putCache(connInsert, curInsert, query, reply)
                    self.cacheQueue.put([connInsert, curInsert, query, reply])
            return reply

    def _checkInference(self, ans):
        # column 0 must be UID
        # User is inferred if all users in answer have same attributes
        # Returns 1 if inference claim correct, else returns 0
        if len(ans) == 0:
            return 0
        numRows = len(ans)
        numColumns = len(ans[0])
        if numColumns < 2:
            # Can't test inference unless there is at least one column
            # (other than UID) that is the same
            return 0
        for c in range(1, numColumns):
            val = ans[0][c]
            for r in range(1, numRows):
                if val != ans[r][c]:
                    return 0
        return 1

    def _checkLinkability(self, ans):
        # The test is the same as with singling out
        # Returns 1 if linkability claim correct, else returns 0
        return self._checkSinglingOut(ans)

    def _checkSinglingOut(self, ans):
        # column 0 must be UID
        # User is singled-out if there is only one distinct UID
        # Returns 1 if singling out claim correct, else returns 0
        if len(ans) == 0:
            return 0
        uids = {}
        for row in ans:
            uids[row[0]] = 1
        numUids = len(uids)
        if numUids == 1:
            return 1
        else:
            return 0

    def _computeNumCells(self, ans):
        # ans is a list of tuples [(x,y),(x,y),(x,y) ...
        # Count the number of columns (in the first row)
        if len(ans) == 0:
            return 0
        numColumns = len(ans[0])
        numRows = len(ans)
        numCells = numColumns * numRows
        return numCells

    def _doParamChecks(self):
        dbInfoRaw = getDatabaseInfo(self._p['rawDb'])
        if not dbInfoRaw:
            sys.exit('rawDb now found in database config')
        if len(self._p['anonDb']) == 0:
            self._p['anonDb'] = self._p['rawDb']
        else:
            dbInfoAnon = getDatabaseInfo(self._p['anonDb'])
            if not dbInfoAnon:
                sys.exit('anonDb not found in database config')
        if self._cr == 'linkability':
            dbInfo = getDatabaseInfo(self._p['pubDb'])
            if not dbInfo:
                sys.exit('Must specify pubDb if criteria is linkability')
        numThreads = self._p['numRawDbThreads'] + self._p['numAnonDbThreads']
        if self._cr == 'linkability':
            numThreads += self._p['numPubDbThreads']
        if numThreads > 50:
            sys.exit("Error: Can't have more than 50 threads total")

    def _getCache(self, cur, query):
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        my_conn = sqlite3.connect(path, timeout=0.1)
        my_cur = my_conn.cursor()
        # turn the query (dict) into a string
        qStr = self._dict2Str(query)
        if qStr is None:
            return None
        sql = str(f"SELECT answer FROM tab where qid = '{qStr}'")
        if self._vb: print(f"   cache DB: {sql}")
        start = time.perf_counter()
        for z in range(1,11):
            try:
                # cur.execute(sql)
                my_cur.execute(sql)
            except sqlite3.OperationalError as e:
                # database is locked
                if self._p['verbose'] or self._vb:
                    logging.warning(f'>> reading from cache DB: {z} attempt(s). Coming next try '
                                  f'soon...')
                err = e
                time.sleep(0.5)
                continue
            except (sqlite3.Error, Exception) as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"getCache error '{e.args[0]}' attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            else:
                break
        else:
            if self._p['verbose'] or self._vb:
                logging.error(f'>> could not read from cache DB >> ERROR: {err}')
            return None
        end = time.perf_counter()
        self._op['numCacheGets'] += 1
        self._op['timeCacheGets'] += (end - start)
        # answer = cur.fetchone() # frzmohammadali just to remember my stupidest bug ever
        answer = my_cur.fetchone()
        my_cur.close()
        my_conn.close()
        if not answer:
            return None
        rtnDict = self._str2Dict(answer[0])
        return rtnDict

    def _putCache(self, conn, cur, query, reply):
        # turn the query and reply (dict) into a string
        # Establish connection to local cache
        path = self._p['locCacheDir'] + "/" + self._p['name'] + ".db"
        qStr = self._dict2Str(query)
        if qStr is None:
            return
        rStr = self._dict2Str(reply)
        if rStr is None:
            return
        sql = str(f"INSERT INTO tab VALUES ('{qStr}','{rStr}')")
        if self._vb: print(f"   cache DB: {sql}")
        start = time.perf_counter()
        err = None
        for z in range(10):
            try:
                # cur.execute(sql)
                # conn.commit()
                my_conn = sqlite3.connect(path, timeout=0.1)
                my_cur = my_conn.cursor()
                my_cur.execute(sql)
                my_conn.commit()

            except sqlite3.IntegrityError as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache error [qid exists in cached queries] '{e.args[0]}' ")
                break
            except sqlite3.OperationalError as e:
                # database is locked
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            except (sqlite3.Error, Exception) as e:
                if self._p['verbose'] or self._vb:
                    logging.warning(f"putCache error '{e.args[0]}' attempt: {z}. Coming next try "
                                    f"soon...")
                err = e
                time.sleep(0.5)
                continue
            else:
                break
            finally:
                try:
                    if my_cur:
                        my_cur.close()
                    if my_conn:
                        my_conn.close()
                except sqlite3.ProgrammingError:
                    # cursor and connection is already closed
                    pass
        else:
            # raise err
            if self._p['verbose'] or self._vb:
                logging.error(f'>> could not insert into cache DB >> ERROR: {err}')

        end = time.perf_counter()
        self._op['numCachePuts'] += 1
        self._op['timeCachePuts'] += (end - start)

    def putCacheWrapper(self, conn, cur, query, reply):
        self._putCache(conn, cur, query, reply)

    def _dict2Str(self, d):
        try:
            dStr = simplejson.dumps(d)
        except TypeError:
            print("simpleJson failed")
            return None
        dByte = str.encode(dStr)
        dByte64 = base64.b64encode(dByte)
        try:
            dByte64Str = str(dByte64, "utf-8")
        except MemoryError:
            print("str(dByte64) failed")
            return None
        return dByte64Str

    def _str2Dict(self, dByte64Str):
        dByte64 = str.encode(dByte64Str)
        dByte = base64.b64decode(dByte64)
        dStr = str(dByte, "utf-8")
        d = simplejson.loads(dStr)
        return d

    def _makeSqlFromSpec(self, spec):
        sql = "select "
        if 'known' in spec:
            numKnown = len(spec['known'])
        else:
            numKnown = 0
        if 'guess' in spec:
            numGuess = len(spec['guess'])
        else:
            numGuess = 0
        if self._cr == 'inference':
            sql += str(f"{self._p['uid']}, ")
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']}")
                if i == (numGuess - 1):
                    sql += " "
                else:
                    sql += ", "
            sql += str(f"from {self._p['table']} ")
            if numKnown:
                sql += "where "
            for i in range(numKnown):
                sql += str(f"{spec['known'][i]['col']} = ")
                sql += str(f"'{spec['known'][i]['val']}' ")
                if i == (numKnown - 1):
                    sql += " "
                else:
                    sql += "and "
        elif self._cr == 'singlingOut' or self._cr == 'linkability':
            sql += str(f"{self._p['uid']} from {self._p['table']} where ")
            for i in range(numKnown):
                sql += str(f"{spec['known'][i]['col']} = ")
                sql += str(f"'{spec['known'][i]['val']}' and ")
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']} = ")
                sql += str(f"'{spec['guess'][i]['val']}' ")
                if i == (numGuess - 1):
                    sql += " "
                else:
                    sql += "and "
        return sql

    def _makeSqlConfFromSpec(self, spec):
        sqls = []
        numGuess = len(spec['guess'])
        if self._cr == 'inference' or self._cr == 'singlingOut':
            sql = str(f"select count(distinct {self._p['uid']}) from {self._p['table']} where ")
            # This first sql learns the number of rows matching the
            # guessed values
            for i in range(numGuess):
                sql += str(f"{spec['guess'][i]['col']} = ")
                sql += str(f"'{spec['guess'][i]['val']}'")
                if i != (numGuess - 1):
                    sql += " and "
            sqls.append(sql)
            # This second sql learns the total number of rows (should
            # normally be a cached result)
            sql = str(f"select count(distinct {self._p['uid']}) from {self._p['table']}")
            sqls.append(sql)
        elif self._cr == 'linkability':
            # nothing happens for linkability
            pass
        return sqls

    def _addToAtkRes(self, label, spec, val):
        """Adds the value to each column in the guess"""
        for tup in spec['guess']:
            col = tup['col']
            if col not in self._atrs['col']:
                print(f"Error: addToAtkRes(): Bad column in spec: '{col}'")
                self.cleanUp(cleanUpCache=False, doExit=True)
            if label not in self._atrs['col'][col]:
                print(f"Error: addToAtkRes(): Bad label '{label}'")
                self.cleanUp(cleanUpCache=False, doExit=True)
            self._atrs['col'][col][label] += val

    def _initAtkRes(self):
        self._atrs = {}
        self._atrs['attack'] = {}
        self._atrs['base'] = {}
        self._atrs['tableStats'] = {}
        self._atrs['col'] = {}
        # ----- Attack parameters
        self._atrs['attack']['attackName'] = self._p['name']
        self._atrs['attack']['rawDb'] = self._p['rawDb']
        self._atrs['attack']['anonDb'] = self._p['anonDb']
        if self._cr == 'linkability':
            self._atrs['attack']['pubDb'] = self._p['anonDb']
        self._atrs['attack']['criteria'] = self._p['criteria']
        self._atrs['attack']['table'] = self._p['table']
        # add parameters for the database machine itself
        db = getDatabaseInfo(self._p['rawDb'])
        self._atrs['attack']['rawHost'] = db['host']
        self._atrs['attack']['rawDbName'] = db['dbname']
        self._atrs['attack']['rawPort'] = db['port']
        if self._cr == 'linkability':
            db = getDatabaseInfo(self._p['pubDb'])
            self._atrs['attack']['pubHost'] = db['host']
            self._atrs['attack']['pubDbName'] = db['dbname']
            self._atrs['attack']['pubPort'] = db['port']
        db = getDatabaseInfo(self._p['anonDb'])
        self._atrs['attack']['anonHost'] = db['host']
        self._atrs['attack']['anonDbName'] = db['dbname']
        self._atrs['attack']['anonPort'] = db['port']
        # and a timestamp
        self._atrs['attack']['startTime'] = str(datetime.datetime.now())
        # ----- Params for computing knowledge:
        # number of prior knowledge cells requested
        self._atrs['base']['knowledgeCells'] = 0
        # number of times knowledge was queried
        self._atrs['base']['knowledgeGets'] = 0

        # ----- Params for computing how much work needed to attack:
        # number of attack cells requested
        self._atrs['base']['attackCells'] = 0
        # number of times attack was queried
        self._atrs['base']['attackGets'] = 0
        self._atrs['tableStats']['colNamesAndTypes'] = self._colNamesTypes
        self._atrs['tableStats']['numColumns'] = len(self._colNamesTypes)
        for tup in self._colNamesTypes:
            col = tup[0]
            if self._vb: print(f"initAtkRes() init column '{col}'")
            self._atrs['col'][col] = {}

            # ----- Params for computing claim success rate:
            # total possible number of claims
            self._atrs['col'][col]['claimTrials'] = 0
            # actual number of claims
            self._atrs['col'][col]['claimMade'] = 0
            # number of correct claims
            self._atrs['col'][col]['claimCorrect'] = 0
            # number of claims that produced bad SQL answer
            self._atrs['col'][col]['claimError'] = 0
            # claims where the attacker chose to pass (not make a claim),
            # but where the claim would have been correct
            self._atrs['col'][col]['claimPassCorrect'] = 0

            # ----- Params for computing confidence:
            # sum of all known count to full count ratios
            self._atrs['col'][col]['sumConfidenceRatios'] = 0
            # number of such ratios
            self._atrs['col'][col]['numConfidenceRatios'] = 0
            # average confidence ratio (division of above two params)
            self._atrs['col'][col]['avgConfidenceRatios'] = 0

    def _initOp(self):
        self._op['numQueries'] = 0
        self._op['timeQueries'] = 0
        self._op['numCachePuts'] = 0
        self._op['timeCachePuts'] = 0
        self._op['numCacheGets'] = 0
        self._op['timeCacheGets'] = 0

    def _initCounters(self):
        self._exploreCounter = 0
        self._knowledgeCounter = 0
        self._attackCounter = 0
        self._claimCounter = 0
        self._guessCounter = 0

    def _initUberDPSession(self):
        # Client establishes a session
        session = requests.Session()
        session.get_orig, session.get = session.get, functools.partial(session.get, timeout=20)

        # remember the session to close it if necessary
        self._session = session

        # function to initialize the session with the dp server
        try:
            # this is the initial query.
            # its only purpose is to obtain a session ID and to define a budget
            # The budget is set in the initial request only
            # Once the budget is set, no further modification to the budget
            # is possible in subsequent requests
            request = {
                'query': "",  # empty query, just serves to get a session ID
                'epsilon': '0.0',  # nothing used up in the initialization phase
                'budget': str(self._remaining_dp_budget),  # the numeric values are sent as strings
                'dbname': self._p['rawDb']['dbname'], # name of the raw db
                'sid': ''  # When sid is Null it indicates start of a session
                }
            # the database for anonymization here is uber
            url = self._p['anonDb']['host']
            headers = {'Content-Type': 'application/json',
                       'Accept': 'application/json'}


            # Client stores the response sent by the simpleServer.py
            response = requests.get(url, json=request, headers=headers, timeout=20, verify=True)
            resp = response.json()  # Convert response sent by server to JSON

            if 'Error' in resp['Server Response']:
                pprint.pprint(resp)  # Client prints the data returned by the server
            else:   # if no error was encountered
                if self._vb:
                    pprint.pprint("Setting up connection with Uber_DP Server")
                    pprint.pprint(resp)  # Client prints the data returned by the server

                # in case there is no error, but we are at the "dummy query" to get the session ID
                self._sid = resp['Server Response']['Session ID']  # Set Session ID to value returned by server


        except requests.ConnectionError as e:
            print("Connection Error. Make sure you are connected to Internet.")
            print(str(e))

        except requests.Timeout as e:
            print("Timeout Error")
            print(str(e))

        except requests.RequestException as e:
            print("General Error")
            print(str(e))

        except KeyboardInterrupt:
            print("Program closed")

Methods

def askAttack(self, query, cache=True)

Generate and queue up an attack query for database.

query is a dictionary with (currently) one value:
query['sql'] contains the SQL query.
query['epsilon'] is optional, and defines how much of the differential privacy budget is used for uber_dp

Expand source code

def askAttack(self, query, cache=True):
    """ Generate and queue up an attack query for database.

        `query` is a dictionary with (currently) one value: <br/>
        `query['sql']` contains the SQL query. <br/>
        `query['epsilon']` is optional, and defines how much of the differential privacy budget is used for uber_dp <br/>
    """
    self._attackCounter += 1
    if self._vb: print(f"Calling {__name__}.askAttack with query '{query}', count {self._attackCounter}")
    # Make a copy of the query for passing around
    qCopy = copy.copy(query)
    job = {}
    job['q'] = self._attackQ
    qCopy['cache'] = cache
    job['queries'] = [qCopy]
    self._anonQ.put(job)

def askClaim(self, spec, cache=True, claim=True)

Generate Claim query for raw and optionally pub databases.

Before anything happens, the system uses the gdaAttack.isClaimed() method to determine whether a previous claim fully or partially matches the new claim. Such duplicates are not allowed and an error will be raised providing additional details about the duplicate.

Making a claim results in a query to the raw database, and if linkability attack, the pub database, to check the correctness of the claim. Multiple calls to this method will cause the corresponding queries to be queued up, so askClaim() returns immediately. getClaim() harvests one claim result.
Set claim=False if this claim should not be applied to the confidence improvement score. In this case, the probability score will instead be reduced accordingly.
The spec is formatted as follows:

{'known':[{'col':'colName','val':'value'},...],
  'guess':[{'col':'colName','val':'value'},...],
}

spec['known'] are the columns and values the attacker already knows (i.e. with prior knowledge). Optional.
spec['guess'] are the columns and values the attacker doesn't know, but rather is trying to predict. Mandatory for 'singling out' and 'inference'. Optional for 'linkabiblity'
Answers are cached
Returns immediately

Expand source code

def askClaim(self, spec, cache=True, claim=True):
    """Generate Claim query for raw and optionally pub databases.

    Before anything happens, the system uses the `gdaAttack.isClaimed`
    method to determine whether a previous claim fully or partially
    matches the new claim. Such duplicates are not allowed and an error
    will be raised providing additional details about the duplicate.

    Making a claim results in a query to the raw database, and if
    linkability attack, the pub database, to check
    the correctness of the claim. Multiple calls to this method will
    cause the corresponding queries to be queued up, so `askClaim()`
    returns immediately. `getClaim()` harvests one claim result. <br/>
    Set `claim=False` if this claim should not be applied to the
    confidence improvement score. In this case, the probability score
    will instead be reduced accordingly. <br/>
    The `spec` is formatted as follows: <br/>

        {'known':[{'col':'colName','val':'value'},...],
          'guess':[{'col':'colName','val':'value'},...],
        }

    `spec['known']` are the columns and values the attacker already knows
    (i.e. with prior knowledge). Optional. <br/>
    `spec['guess']` are the columns and values the attacker doesn't know,
    but rather is trying to predict. Mandatory for 'singling out'
    and 'inference'. Optional for 'linkabiblity' <br/>
    Answers are cached <br/>
    Returns immediately"""
    if self._vb: print(f"Calling {__name__}.askClaim with spec '{spec}', count {self._claimCounter}")
    if not self._dupCheck.is_claimed(spec, verbose=self._vb, raise_true=True):
        self._dupCheck.claim(spec, verbose=self._vb)
    self._claimCounter += 1
    sql = self._makeSqlFromSpec(spec)
    if self._vb: print(f"Sql is '{sql}'")
    sqlConfs = self._makeSqlConfFromSpec(spec)
    if self._vb: print(f"SqlConf is '{sqlConfs}'")
    # Make a copy of the query for passing around
    job = {}
    job['q'] = self._claimQ
    job['claim'] = claim
    job['queries'] = [{'sql': sql, 'cache': cache}]
    job['spec'] = spec
    for sqlConf in sqlConfs:
        job['queries'].append({'sql': sqlConf, 'cache': cache})
    self._rawQ.put(job)

def askExplore(self, query, cache=True)

Generate and queue up an exploritory query for database

No score book-keeping is done here. An analyst may make any number of queries without impacting the GDA score.
query is a dictionary with two values:
query['sql'] contains the SQL query.
query['db'] determines which database is queried, and is one of 'rawDb', 'anonDb', or (if linkability), 'pubDb'.

Expand source code

def askExplore(self, query, cache=True):
    """ Generate and queue up an exploritory query for database

        No score book-keeping is done here. An analyst may make
        any number of queries without impacting the GDA score. <br/>
        `query` is a dictionary with two values: <br/>
        `query['sql']` contains the SQL query. <br/>
        `query['db']` determines which database is queried, and
        is one of 'rawDb', 'anonDb', or (if linkability), 'pubDb'."""

    self._exploreCounter += 1
    if self._vb: print(f"Calling {__name__}.askExplore with "
                       f"query '{query}', count {self._exploreCounter}")
    # Make a copy of the query for passing around
    qCopy = copy.copy(query)
    job = {}
    job['q'] = self._exploreQ
    qCopy['cache'] = cache
    job['queries'] = [qCopy]
    if qCopy['db'] == 'rawDb' or qCopy['db'] == 'raw':
        self._rawQ.put(job)
    elif qCopy['db'] == 'anonDb' or qCopy['db'] == 'anon':
        self._anonQ.put(job)
    else:
        self._pubQ.put(job)

def askKnowledge(self, query, cache=True)

Generate and queue up a prior knowledge query for database

The class keeps track of how many prior knowledge cells were returned and uses this to compute a score.
Input parameters formatted the same as with askAttack()

Expand source code

def askKnowledge(self, query, cache=True):
    """ Generate and queue up a prior knowledge query for database

        The class keeps track of how many prior knowledge cells were
        returned and uses this to compute a score. <br/>
        Input parameters formatted the same as with `askAttack()`"""

    self._knowledgeCounter += 1
    if self._vb: print(f"Calling {__name__}.askKnowledge with query "
                       f"'{query}', count {self._knowledgeCounter}")
    # Make a copy of the query for passing around
    qCopy = copy.copy(query)
    job = {}
    job['q'] = self._knowledgeQ
    qCopy['cache'] = cache
    job['queries'] = [qCopy]
    self._rawQ.put(job)

def cleanUp(self, cleanUpCache=True, doExit=False, exitMsg='Finished cleanUp, exiting')

Garbage collect queues, threads, and cache.

By default, this wipes the cache. The idea being that if the entire attack finished successfully, then it won't be repeated and the cache isn't needed. Do cleanUpCache=False if that isn't what you want.

Expand source code

def cleanUp(self, cleanUpCache=True, doExit=False,
            exitMsg="Finished cleanUp, exiting"):
    """ Garbage collect queues, threads, and cache.

        By default, this wipes the cache. The idea being that if the
        entire attack finished successfully, then it won't be
        repeated and the cache isn't needed. Do `cleanUpCache=False`
        if that isn't what you want."""
    if self._vb: print(f"Calling {__name__}.cleanUp")
    if self._rawQ.empty() != True:
        logging.warning("Warning, trying to clean up when raw queue not empty!")
    if self._anonQ.empty() != True:
        logging.warning("Warning, trying to clean up when anon queue not empty!")
    if self.cacheQueue.empty() != True:
        logging.warning("Warning, trying to clean up when cache queue not empty!")
    # Stuff in end signals for the workers (this is a bit bogus, cause
    # if a thread is gone or hanging, not all signals will get read)
    for i in range(self._p['numRawDbThreads']):
        self._rawQ.put(None)
    for i in range(self._p['numAnonDbThreads']):
        self._anonQ.put(None)

    for i in range(self.cacheQueue.qsize()):
        self.cacheQueue.put(None)

    cleanBgThreads()

    if len(self._p['pubDb']) > 0:
        if self._pubQ.empty() != True:
            print("Warning, trying to clean up when pub queue not empty!")
        for i in range(self._p['numPubDbThreads']):
            self._pubQ.put(None)
        for t in self._pubThreads:
            if t.isAlive(): t.stop() # t.join()
    if cleanUpCache:
        self._removeLocalCacheDB()
    if self._session: # close the uber session
        self._session.close()
    if doExit:
        sys.exit(exitMsg)

def getAnonTableCharacteristics(self, tableName='')

Returns the full contents of the table characteristics

Return value is a dict indexed by column name:

{ '<colName>':
    {
        'av_rows_per_vals': 3.93149,
        'av_uids_per_val': 0.468698,
        'column_label': 'continuous',
        'column_name': 'dropoff_latitude',
        'column_type': 'real',
        'max': '898.29382000000000',
        'min': '-0.56333297000000',
        'num_distinct_vals': 24216,
        'num_rows': 95205,
        'num_uids': 11350,
        'std_rows_per_val': 10.8547,
        'std_uids_per_val': 4.09688},
    }
}

Expand source code

def getAnonTableCharacteristics(self, tableName=''):
    """Returns the full contents of the table characteristics

       Return value is a dict indexed by column name: <br/>

           { '<colName>':
               {
                   'av_rows_per_vals': 3.93149,
                   'av_uids_per_val': 0.468698,
                   'column_label': 'continuous',
                   'column_name': 'dropoff_latitude',
                   'column_type': 'real',
                   'max': '898.29382000000000',
                   'min': '-0.56333297000000',
                   'num_distinct_vals': 24216,
                   'num_rows': 95205,
                   'num_uids': 11350,
                   'std_rows_per_val': 10.8547,
                   'std_uids_per_val': 4.09688},
               }
           }

    """
    if len(tableName) == 0:
        # caller didn't supply a table name, so get it from the
        # class init
        tableName = self._p['table']

    # Modify table name to the default for the characteristics table
    # tableName += '_char'

    # Establish connection to database
    db = getDatabaseInfo(self._p['anonDb'])
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()

    # Query it for column names
    sql = str(f"""select column_name, data_type 
              from information_schema.columns
              where table_schema NOT IN ('information_schema', 'pg_catalog') and 
              table_name='{tableName}'""")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getAnonTableCharacteristics() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()

    # Set up return dict
    ret = {_row[0]: {'column_name': _row[0], 'column_type': _row[1]} for _row in ans}

    conn.close()
    return ret

def getAttack(self)

Returns the result of one askAttack() call

Blocks until the result is available. Note that the order in which results are received is not necesarily the order in which askAttack() calls were made.
Assuming result is returned:
result['answer'] is the answer returned by the DB. The format is:
[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]
where C1 is the first element of the SELECT, C2 the second element, etc. This attribute does not exist in cases of query error (i.e. bad sql, budget exceeded if uber_dp, etc.)
result['cells'] is the number of cells returned in the answer (used by gdaAttack to compute total attack cells)
result['query']['sql'] is the query from the corresponding askAttack(). result['error'] contains the error description
result['remaining_dp_budget'] contains the remaining differential privacy budget when uber_dp is used.

Expand source code

def getAttack(self):
    """ Returns the result of one askAttack() call

        Blocks until the result is available. Note that the order
        in which results are received is not necesarily the order
        in which `askAttack()` calls were made. <br/>
        Assuming `result` is returned: <br/>
        `result['answer']` is the answer returned by the DB. The
        format is: <br/>
            `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
        where C1 is the first element of the `SELECT`, C2 the second
        element, etc. This attribute does not exist in cases of query
        error (i.e. bad sql, budget exceeded if uber_dp, etc.) <br/>
        `result['cells']` is the number of cells returned in the answer
        (used by `gdaAttack()` to compute total attack cells) <br/>
        `result['query']['sql']` is the query from the corresponding
        `askAttack()`.
        `result['error']` contains the error description <br/>
        `result['remaining_dp_budget']` contains the remaining differential
        privacy budget when uber_dp is used. <br/>
        """

    if self._vb:
        print(f"Calling {__name__}.getAttack")
    if self._attackCounter == 0:
        # Caller shouldn't be calling if there are no expected
        # answers, but is anyway, so just return
        return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                'stillToCome': 0}
    job = self._attackQ.get()
    self._attackQ.task_done()
    self._attackCounter -= 1
    reply = job['replies'][0]
    reply['stillToCome'] = self._attackCounter
    self._atrs['base']['attackGets'] += 1
    if 'cells' in reply:
        if reply['cells'] == 0:
            self._atrs['base']['attackCells'] += 1
        else:
            self._atrs['base']['attackCells'] += reply['cells']
    else:
        self._atrs['base']['attackCells'] += 1

    if self._type == 'uber_dp':
        reply['remaining_dp_budget'] = self._remaining_dp_budget
    return (reply)

def getAttackTableName(self)

Returns the name of the table being used in the attack.

Expand source code

def getAttackTableName(self):
    """Returns the name of the table being used in the attack."""
    return self._p['table']

def getClaim(self)

Wait for and gather results of askClaim() calls

Returns a data structure that contains both the result of one finished claim, and the claim's input parameters. Note that the order in which results are returned by getClaim() are not necessarily the same order they were inserted by askClaim().
Assuming result is returned:
result['claim'] is the value supplied in the corresponding askClaim() call
result['spec'] is a copy of the spec supplied in the corresponding askClaim() call.
result['queries'] is a list of the queries generated in order to validate the claim.
result['answers'] are the answers to the queries in result['queries'].
result['claimResult'] is 'Correct' or 'Incorrect', depending on whether the claim satisfies the critieria or not.
result['stillToCome'] is a counter showing how many more claims are still queued. When stillToCome is 0, then all claims submitted by askClaim() have been returned.

Expand source code

def getClaim(self):
    """ Wait for and gather results of askClaim() calls

        Returns a data structure that contains both the result
        of one finished claim, and the claim's input parameters.
        Note that the order in which results are returned by
        `getClaim()` are not necessarily the same order they were
        inserted by `askClaim()`. <br/>
        Assuming `result` is returned: <br/>
        `result['claim']` is the value supplied in the corresponding
        `askClaim()` call <br/>
        `result['spec']` is a copy of the `spec` supplied in the
        corresponding `askClaim()` call. <br/>
        `result['queries']` is a list of the queries generated in order to
        validate the claim. <br/>
        `result['answers']` are the answers to the queries in
        `result['queries']`. <br/>
        `result['claimResult']` is 'Correct' or 'Incorrect', depending
        on whether the claim satisfies the critieria or not. <br/>
        `result['stillToCome']` is a counter showing how many more
        claims are still queued. When `stillToCome` is 0, then all
        claims submitted by `askClaim()` have been returned."""

    if self._vb:
        print(f"Calling {__name__}.getClaim")
    if self._claimCounter == 0:
        # Caller shouldn't be calling if there are no expected
        # answers, but is anyway, so just return
        return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                'stillToCome': 0, 'claimResult': 'Error'}
    job = self._claimQ.get()
    claim = job['claim']
    self._claimQ.task_done()
    self._claimCounter -= 1
    job['stillToCome'] = self._claimCounter
    self._addToAtkRes('claimTrials', job['spec'], 1)
    # The claim is tested against the first reply
    reply = job['replies'][0]
    job['claimResult'] = 'Wrong'
    if claim:
        self._addToAtkRes('claimMade', job['spec'], 1)
    if 'error' in reply:
        self._addToAtkRes('claimError', job['spec'], 1)
        job['claimResult'] = 'Error'
    else:
        if self._cr == 'singlingOut':
            claimIsCorrect = self._checkSinglingOut(reply['answer'])
        elif self._cr == 'inference':
            claimIsCorrect = self._checkInference(reply['answer'])
        elif self._cr == 'linkability':
            claimIsCorrect = self._checkLinkability(reply['answer'])
        if claim == 1 and claimIsCorrect:
            self._addToAtkRes('claimCorrect', job['spec'], 1)
            job['claimResult'] = 'Correct'
        elif claim == 0 and claimIsCorrect:
            self._addToAtkRes('claimPassCorrect', job['spec'], 1)
            job['claimResult'] = 'Correct'
    if self._cr == 'singlingOut' or self._cr == 'inference':
        # Then measure confidence against the second and third replies
        if 'answer' in job['replies'][1]:
            if job['replies'][1]['answer']:
                guessedRows = job['replies'][1]['answer'][0][0]
            else:
                guessedRows = 0
        elif 'error' in job['replies'][1]:
            self._pp.pprint(job)
            print(f"Error: conf query:\n{job['replies'][1]['error']}")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if 'answer' in job['replies'][2]:
            if job['replies'][2]['answer']:
                totalRows = job['replies'][2]['answer'][0][0]
            else:
                totalRows = 0
        elif 'error' in job['replies'][2]:
            self._pp.pprint(job)
            print(f"Error: conf query:\n{job['replies'][2]['error']}")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if totalRows:
            self._addToAtkRes('sumConfidenceRatios', job['spec'],
                              guessedRows / totalRows)
            self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
            self._atrs['tableStats']['totalRows'] = totalRows
    else:
        # For linkability, the confidence is always 1/2
        self._addToAtkRes('sumConfidenceRatios', job['spec'], 0.5)
        self._addToAtkRes('numConfidenceRatios', job['spec'], 1)
    if 'q' in job:
        del job['q']
    return (job)

def getColNames(self, dbType='rawDb', tableName='')

Return simple list of column names

dbType is one of 'rawDb' or 'anonDb'

Expand source code

def getColNames(self, dbType='rawDb', tableName=''):
    """Return simple list of column names

    `dbType` is one of 'rawDb' or 'anonDb'"""

    if len(tableName) == 0:
        colsAndTypes = self.getColNamesAndTypes(dbType=dbType)
    else:
        colsAndTypes = self.getColNamesAndTypes(
            dbType=dbType, tableName=tableName)
    if not colsAndTypes:
        return None
    cols = []
    for tup in colsAndTypes:
        cols.append(tup[0])
    return cols

def getColNamesAndTypes(self, dbType='rawDb', tableName='')

Return raw database column names and types (or None if error)

dbType is one of 'rawDb' or 'anonDb'
return format: [(col,type),(col,type),…]

Expand source code

def getColNamesAndTypes(self, dbType='rawDb', tableName=''):
    """Return raw database column names and types (or None if error)

    dbType is one of 'rawDb' or 'anonDb' <br/>
    return format: [(col,type),(col,type),...]"""
    if len(tableName) == 0:
        # caller didn't supply a table name, so get it from the
        # class init
        tableName = self._p['table']

    # Establish connection to database
    db = getDatabaseInfo(self._p[dbType])
    if db['type'] != 'postgres' and db['type'] != 'aircloak':
        print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
        return None
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()
    # Query it for column names
    if db['type'] == 'postgres':
        sql = str(f"""select column_name, data_type 
                  from information_schema.columns where
                  table_name='{tableName}'""")
    elif db['type'] == 'aircloak':
        sql = str(f"show columns from {tableName}")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getColNamesAndTypes() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()
    ret = []
    for row in ans:
        ret.append((row[0], row[1]))
    conn.close()
    return ret

def getExplore(self)

Wait for and gather results of prior askExplore() calls.

Blocks until the result is available. Note that the order in which results are received is not necesarily the order in which askExplore() calls were made.
Return parameter formatted the same as with getAttack()

Expand source code

def getExplore(self):
    """ Wait for and gather results of prior askExplore() calls.

        Blocks until the result is available. Note that the order
        in which results are received is not necesarily the order
        in which `askExplore()` calls were made. <br/>
        Return parameter formatted the same as with `getAttack()`"""
    if self._vb:
        print(f"Calling {__name__}.getExplore")
    if self._exploreCounter == 0:
        # Caller shouldn't be calling if there are no expected
        # answers, but is anyway, so just return
        return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                'stillToCome': 0}
    job = self._exploreQ.get()
    self._exploreQ.task_done()
    self._exploreCounter -= 1
    reply = job['replies'][0]
    reply['stillToCome'] = self._exploreCounter
    return (reply)

def getKnowledge(self)

Wait for and gather results of prior askKnowledge() calls

Blocks until the result is available. Note that the order in which results are received is not necesarily the order in which askKnowledge() calls were made.
Return parameter formatted the same as with getAttack()

Expand source code

def getKnowledge(self):
    """ Wait for and gather results of prior askKnowledge() calls

        Blocks until the result is available. Note that the order
        in which results are received is not necesarily the order
        in which `askKnowledge()` calls were made. <br/>
        Return parameter formatted the same as with `getAttack()`"""

    if self._vb:
        print(f"Calling {__name__}.getKnowledge")
    if self._knowledgeCounter == 0:
        # Caller shouldn't be calling if there are no expected
        # answers, but is anyway, so just return
        return {'query': {'sql': 'None'}, 'error': 'Nothing to do',
                'stillToCome': 0}
    job = self._knowledgeQ.get()
    self._knowledgeQ.task_done()
    self._knowledgeCounter -= 1
    reply = job['replies'][0]
    reply['stillToCome'] = self._knowledgeCounter
    self._atrs['base']['knowledgeGets'] += 1
    if 'cells' in reply:
        self._atrs['base']['knowledgeCells'] += reply['cells']
    return (reply)

def getOpParameters(self)

Returns a variety of performance measurements.

Useful for debugging.

Expand source code

def getOpParameters(self):
    """ Returns a variety of performance measurements.

        Useful for debugging."""
    self._op['avQueryDuration'] = 0
    if self._op['numQueries'] > 0:
        self._op['avQueryDuration'] = (
                self._op['timeQueries'] / self._op['numQueries'])
    self._op['avCachePutDuration'] = 0
    if self._op['numCachePuts'] > 0:
        self._op['avCachePutDuration'] = (
                self._op['timeCachePuts'] / self._op['numCachePuts'])
    self._op['avCacheGetDuration'] = 0
    if self._op['numCacheGets'] > 0:
        self._op['avCacheGetDuration'] = (
                self._op['timeCacheGets'] / self._op['numCacheGets'])
    return self._op

def getPriorKnowledge(self, dataColumns, method, fraction=None, count=None, selectColumn=None, colRange=[None, None], values=[None])

Returns data from the rawDB according to a specification

This mimics external knowledge that an attacker may have about the data, and influences the 'knowledge' part of the GDA Score.
dataColumns is a list of column names. The data for these columns is returned
method can be 'rows' or 'users'. If 'rows', then rows are selected according to the criteria (fraction, count, selectColumn, colRange, or values). If 'users', then all rows for a set of selected users is returned. The users are selected according to the criteria (fraction or count)
If none of the criteria are set, or if fraction is set to 1.0, then all rows are returned (for the selected column values) One of fraction, count, or selectColumn must be set.
fraction or count are set to obtain a random set of rows or users. If fraction, then an approximate fraction of all rows/users is selected. fraction is a value between 0 and 1.0. If count, then exactly count random rows/users are selected.
selectColumn is set to select rows according to the values of the specified column. selectColumn is a column name. If set, then either a range of values (colRange), or a set of values (values) must be chosen.
colRange is a list with two values: [min,max]. This selects all values between min and max inclusive.
values is a list of one or more values of any type. This selects all values matching those in the list.
The return value is a list in this format:
[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]
where C1 corresponds to the first column in dataColumns, C2 corresponds to the second column in dataColumns, and so on.

Expand source code

def getPriorKnowledge(self, dataColumns, method,
        fraction=None, count=None, selectColumn=None, colRange=[None,None], values=[None]):
    """ Returns data from the rawDB according to a specification

    This mimics external knowledge that an attacker may have about the data, and
    influences the 'knowledge' part of the GDA Score. <br/>
        `dataColumns` is a list of column names. The data for these columns is returned <br/>
        `method` can be 'rows' or 'users'. If 'rows', then rows are selected
        according to the criteria (`fraction`, `count`, `selectColumn`, `colRange`,
        or `values`).
        If 'users', then all rows for a set of selected users is returned.
        The users are selected according to the criteria (`fraction` or `count`) <br/>
        If none of the criteria are set, or if `fraction` is set to 1.0, then all
        rows are returned (for the selected column values) One of `fraction`, `count`,
        or `selectColumn` must be set. <br/>
        `fraction` or `count` are set to obtain a random set of rows or users. If
        `fraction`, then an approximate fraction of all rows/users is selected.
        `fraction` is a value between 0 and 1.0. If `count`, then exactly `count`
        random rows/users are selected. <br/>
        `selectColumn` is set to select rows according to the values of the specified
        column. `selectColumn` is a column name. If set, then either a range of
        values (`colRange`), or a set of values (`values`) must be chosen. <br/>
        `colRange` is
        a list with two values: `[min,max]`. This selects all values
        between min and max inclusive. <br/>
        `values` is a list
        of one or more values of any type. This selects all values matching those in
        the list. <br/>
        The return value is a list in this format: <br/>
            `[(C1,C2...,Cn),(C1,C2...,Cn), ... (C1,C2...,Cn)]` <br/>
        where C1 corresponds to the first column in `dataColumns`, C2 corresponds to
        the second column in `dataColumns`, and so on.  <br/>
    """
    # Check input parameters
    if not isinstance(dataColumns, list):
        print(f"getPriorKnowledge Error: dataColumns must be a list of one or more column names")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if method not in ['rows','users']:
        print(f"getPriorKnowledge Error: method must be 'rows' or 'users'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if fraction is None and count is None and selectColumn is None:
        print(f"getPriorKnowledge Error: one of fraction, count, or selectColumn must be set")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if fraction and not isinstance(fraction, float):
        print(f"getPriorKnowledge Error: if set, fraction must be a float")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if (fraction and (count or selectColumn)) or (count and (fraction or selectColumn)):
        print(f"getPriorKnowledge Error: only one of fraction, count, or selectColumn may be set")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if count and not isinstance(count, int):
        print(f"getPriorKnowledge Error: if set, count must be an integer")
        self.cleanUp(cleanUpCache=False, doExit=True)
    if selectColumn:
        if selectColumn not in self._colNames:
            print(f"getPriorKnowledge Error: selectColumn '{selectColumn}' is not a valid column")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if colRange == [None,None] and values == [None]:
            print(f"getPriorKnowledge Error: if selectColumn is set, one of colRange or values must be set")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if not isinstance(colRange, list):
            print(f"getPriorKnowledge Error: colRange must be a list with two values")
            self.cleanUp(cleanUpCache=False, doExit=True)
        if not (isinstance(values, list) or isinstance(values, tuple)) or len(values) == 0:
            print(f"getPriorKnowledge Error: values must be a list or tuple with one or more values")
            self.cleanUp(cleanUpCache=False, doExit=True)
    for col in dataColumns:
        if col not in self._colNames:
            print(f"getPriorKnowledge Error: column '{col}' is not a valid column")
            self.cleanUp(cleanUpCache=False, doExit=True)
    # Basic input checks finished
    # Establish connection to database
    db = getDatabaseInfo(self._p['rawDb'])
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()
    table = self._p['table']
    uid = self._p['uid']
    # Make the SELECT part of the SQL query
    initSql = 'SELECT '
    for col in dataColumns:
        initSql += str(f"{col}, ")
    initSql = initSql[0:-2]
    if method == 'rows' and fraction:
        sql = initSql + str(f" FROM {table} WHERE random() <= {fraction}")
        ans = self._doQuery(cur,sql)
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    if method == 'users' and fraction:
        sql = initSql + str(f" FROM {table} WHERE {uid} IN ")
        sql += str(f"(SELECT {uid} from (SELECT DISTINCT {uid} FROM {table}) t WHERE random() < {fraction})")
        ans = self._doQuery(cur,sql)
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    if method == 'rows' and colRange[0] is not None:
        for pair in self._colNamesTypes:
            if selectColumn in pair[0]:
                colType = pair[1]
                break
        if 'text' in colType or 'char' in colType or 'date' in colType or 'time' in colType:
            sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= '{colRange[0]}' and {selectColumn} <= '{colRange[1]}'")
        else:
            sql = initSql + str(f" FROM {table} WHERE {selectColumn} >= {colRange[0]} and {selectColumn} <= {colRange[1]}")
        ans = self._doQuery(cur,sql)
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    if method == 'rows' and values[0] is not None:
        sql = initSql + str(f" FROM {table} WHERE {selectColumn} IN (")
        for pair in self._colNamesTypes:
            if selectColumn in pair[0]:
                colType = pair[1]
                break
        for value in values:
            if "text" in colType or "date" in colType or "time" in colType:
                sql += str(f"'{value}', ")
            else:
                sql += str(f"{value}, ")
        sql = sql[0:-2]
        sql += ")"
        ans = self._doQuery(cur,sql)
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    if method == 'rows' and count:
        # need to know the total number of rows
        sql = str(f"select count(*) from {table}")
        ans = self._doQuery(cur,sql)
        numRows = ans[0][0]
        # next we get some random set of rows that is certainly more than we need
        frac = (count/numRows)*2
        sql = initSql + str(f" FROM {table} WHERE random() <= {frac}")
        temp = self._doQuery(cur,sql)
        # next we scramble these so that we get a random sampling from the random sampling
        random.shuffle(temp)
        # finally pick the exact count
        ans = temp[0:count]
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    if method == 'users' and count:
        # get the full list of distinct UIDs
        sql = str(f"SELECT DISTINCT {uid} from {table}")
        uidList = self._doQuery(cur,sql)
        # next we scramble these so that we can get a random sampling
        random.shuffle(uidList)
        # pick the exact count of UIDs
        uidList = uidList[0:count]
        sql = initSql + str(f" FROM {table} WHERE {uid} IN (")
        for pair in self._colNamesTypes:
            if uid in pair[0]:
                colType = pair[1]
                break
        for uidVal in uidList:
            if "text" in colType or "date" in colType or "time" in colType:
                sql += str(f"'{uidVal[0]}', ")
            else:
                sql += str(f"{uidVal[0]}, ")
        sql = sql[0:-2]
        sql += ")"
        ans = self._doQuery(cur,sql)
        self._atrs['base']['knowledgeCells'] += len(dataColumns) * len(ans)
        return(ans)
    #zzzz
    return None

def getPublicColValues(self, colName, tableName='')

Return list of "publicly known" column values and counts

Column value has index 0, count of distinct UIDs has index 1 Must specify column name.

Expand source code

def getPublicColValues(self, colName, tableName=''):
    """Return list of "publicly known" column values and counts

    Column value has index 0, count of distinct UIDs has index 1
    Must specify column name.
    """
    if len(colName) == 0:
        print(f"Must specify column 'colName'")
        return None

    if len(tableName) == 0:
        # caller didn't supply a table name, so get it from the
        # class init
        tableName = self._p['table']

    # Establish connection to database
    db = getDatabaseInfo(self._p['rawDb'])
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()
    # First we need to know the total number of distinct users
    sql = str(f"""select count(distinct {self._p['uid']})
                  from {tableName}""")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getPublicColValues() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()
    numUid = ans[0][0]
    # Query the raw db for values in the column
    sql = str(f"""select {colName}, count(distinct {self._p['uid']})
                  from {tableName}
                  group by 1
                  order by 2 desc
                  limit 200""")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getPublicColValues() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()
    ret = []
    for row in ans:
        # row[0] is the value, row[1] is the count
        if (((row[1] / numUid) > 0.002) and
                (row[1] >= 50)):
            ret.append((row[0], row[1]))
    conn.close()
    return ret

def getResults(self)

Returns all of the compiled attack results.

This can be input to class gdaScores() and method gdaScores.addResult().

Expand source code

def getResults(self):
    """ Returns all of the compiled attack results.

        This can be input to class `gdaScores()` and method
        `gdaScores.addResult()`."""
    # Add the operational parameters
    self._atrs['operational'] = self.getOpParameters()
    self._cleanPasswords()
    return self._atrs

def getTableCharacteristics(self, tableName='')

Returns the full contents of the table characteristics

Return value is a dict indexed by column name:

{ '<colName>':
    {
        'av_rows_per_vals': 3.93149,
        'av_uids_per_val': 0.468698,
        'column_label': 'continuous',
        'column_name': 'dropoff_latitude',
        'column_type': 'real',
        'max': '898.29382000000000',
        'min': '-0.56333297000000',
        'num_distinct_vals': 24216,
        'num_rows': 95205,
        'num_uids': 11350,
        'std_rows_per_val': 10.8547,
        'std_uids_per_val': 4.09688},
    }
}

Expand source code

def getTableCharacteristics(self, tableName=''):
    """Returns the full contents of the table characteristics

       Return value is a dict indexed by column name: <br/>

           { '<colName>':
               {
                   'av_rows_per_vals': 3.93149,
                   'av_uids_per_val': 0.468698,
                   'column_label': 'continuous',
                   'column_name': 'dropoff_latitude',
                   'column_type': 'real',
                   'max': '898.29382000000000',
                   'min': '-0.56333297000000',
                   'num_distinct_vals': 24216,
                   'num_rows': 95205,
                   'num_uids': 11350,
                   'std_rows_per_val': 10.8547,
                   'std_uids_per_val': 4.09688},
               }
           }

    """
    if len(tableName) == 0:
        # caller didn't supply a table name, so get it from the
        # class init
        tableName = self._p['table']

    # Modify table name to the default for the characteristics table
    tableName += '_char'

    # Establish connection to database
    db = getDatabaseInfo(self._p['rawDb'])
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()
    # Set up return dict
    ret = {}
    # Query it for column names
    sql = str(f"""select column_name, data_type 
              from information_schema.columns where
              table_name='{tableName}'""")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getTableCharacteristics() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    cols = cur.fetchall()
    # Make index for column name (should be 0, but just to be sure)
    for colNameIndex in range(len(cols)):
        if cols[colNameIndex][0] == 'column_name':
            break

    # Query it for table contents
    sql = str(f"SELECT * FROM {tableName}")
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getTableCharacteristics() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()
    for row in ans:
        colName = row[colNameIndex]
        ret[colName] = {}
        for i in range(len(row)):
            ret[colName][cols[i][0]] = row[i]
    conn.close()
    return ret

def getTableNames(self, dbType='rawDb')

Return database table names

dbType is one of 'rawDb' or 'anonDb'
Table names returned as list, unless error then return None

Expand source code

def getTableNames(self, dbType='rawDb'):
    """Return database table names

    dbType is one of 'rawDb' or 'anonDb' <br/>
    Table names returned as list, unless error then return None"""

    # Establish connection to database
    db = getDatabaseInfo(self._p[dbType])
    if db['type'] != 'postgres' and db['type'] != 'aircloak':
        print(f"DB type '{db['type']}' must be 'postgres' or 'aircloak'")
        return None
    connStr = str(
        f"host={db['host']} port={db['port']} dbname={db['dbname']} user={db['user']} password={db['password']}")
    conn = psycopg2.connect(connStr)
    cur = conn.cursor()
    # Query it for column names
    if db['type'] == 'postgres':
        sql = """SELECT tablename
                 FROM pg_catalog.pg_tables
                 WHERE schemaname != 'pg_catalog' AND
                       schemaname != 'information_schema'"""
    elif db['type'] == 'aircloak':
        sql = "show tables"
    try:
        cur.execute(sql)
    except psycopg2.Error as e:
        print(f"Error: getTableNames() query: '{e}'")
        self.cleanUp(cleanUpCache=False, doExit=True)
    ans = cur.fetchall()
    ret = []
    for row in ans:
        ret.append(row[0])
    conn.close()
    return ret

def getUidColName(self)

Returns the name of the UID column

Expand source code

def getUidColName(self):
    """ Returns the name of the UID column"""
    return self._p['uid']

def isClaimed(self, spec)

Check if a claim was already fully or partially made.

The spec is formatted identical to the spec in gdaAttack.askClaim().

Expand source code

def isClaimed(self, spec):
    """Check if a claim was already fully or partially made.

    The `spec` is formatted identical to the `spec` in `gdaAttack.askClaim`."""
    return self._dupCheck.is_claimed(spec, verbose=self._vb)

def putCacheWrapper(self, conn, cur, query, reply)

Expand source code

def putCacheWrapper(self, conn, cur, query, reply):
    self._putCache(conn, cur, query, reply)

def removeLocalCacheDBWrapper(self)

Expand source code

def removeLocalCacheDBWrapper(self):
    return self._removeLocalCacheDB()

def setVerbose(self)

Sets Verbose to True

Expand source code

def setVerbose(self):
    """Sets Verbose to True"""
    self._vb = True

def unsetVerbose(self)

Sets Verbose to False

Expand source code

def unsetVerbose(self):
    """Sets Verbose to False"""
    self._vb = False