#!/usr/bin/env python # # bets.py -- test cases nailing down obscure BerkeleyDB behaviour # Written by Luke Gorrie . $Revision: 1.1 $ # # These test cases were written to investigate the behaviour of # BerkeleyDB databases containing incomplete secondary index tables. I # had observed some weird behaviour in the field and I wrote these # test cases to confirm my intuition of what was happening. Now that's # done with so they exist as documentation. # # Suppose: # # You have a primary database. # You have a secondary ('associated') database. # The secondary database is missing records for some keys. # # In this situation: # # You cannot write a new value that modifies a secondary key if that # secondary key is missing from the index table. # # You can write a new value that preserves the old secondary keys. # Doing this has the side-effect of adding the missing record to # the secondary index table. That is, you can 'heal' the secondary # index by writing the original value again. # # This suggests to me the following algorithm for putting records into # a database that may have inconsistent indexing: # # try: # db.put(key, value) # except DBSecondaryBadError: # db.put(key, db.get(key)) # rewrite existing value to correct indexing # db.put(key, value) # try again # # "Problem solved." # # NB: Reading __db_c_put in db/db_cam.c in the BDB 4.1.25 sources # makes the strange behaviour explicable. # # Their algorithm is approximately: # # 1. Write new secondary index records for the new value. # 2. For each secondary index that's different than in the original # value, delete the old record. # 3. Write the primary record. # # so #1 has the healing effect and #2 goes wrong if the record it # wants to delete is missing and that prevents #3 from updating the # DB. (The reason for the funny ordering is for locking discipline.) # # ...... or so it seems to me more or less! import os import unittest from bsddb import db class BetsTest(unittest.TestCase): def setUp(self): os.system('rm -rf /tmp/betsdb') def testPopulate(self): """no secondary index""" bdb = db.DB() bdb.open('/tmp/betsdb', None, db.DB_BTREE, db.DB_CREATE) keys = ['a','b','c','d','e'] for key in keys: bdb.put(key, key) for key in keys: assert key == bdb.get(key) def testSecKey(self): """secondary index (not corrupted)""" # # Create & associate the databases before writing any records. # pridb = openDB('pri') secdb = openDB('sec') pridb.associate(secdb, extractor(1)) # # Everything works as you'd expect. # for value in ['ab', 'cd', 'de']: key, seckey = value[0], value[1] pridb.put(key, value) assert pridb.get(key) == value assert secdb.get(seckey) == value def testMissingSecKey(self): """single corrupted index""" # # Create both databases but slip a record in before associating them. # pridb = openDB('pri') secdb = openDB('sec') pridb.put('a', 'ab') pridb.associate(secdb, lambda key, value: value[1]) assert None == secdb.get('b') # # Now secdb is missing a record so we can check the "strange" # behaviour. # # put() can't change the secondary key. # self.assertRaises(db.DBNotFoundError, # DBSecondaryBadError in BDB4.4! lambda: pridb.put('a', 'ac')) # # but it's ok to put() the old record for healing and then repeat # pridb.put('a', 'ab') pridb.put('a', 'ac') # # sanity checks # assert 'ac' == secdb.get('c') assert None == secdb.get('b') def testMissingSecKeyList(self): """partially corrupted indexes""" indexes = [0,1,2,3,4] pridb = openDB('pri') secdbs = [openDB('sec.%d' % ix) for ix in indexes] # # Add the odd-numbered indexes. # for index in indexes: if index % 2 == 1: pridb.associate(secdbs[index], extractor(index)) # # Add a record that will be partially indexed on odd fields. # pridb.put('a', 'abcde') # # Now index the even fields. # for index in indexes: if index % 2 == 0: pridb.associate(secdbs[index], extractor(index)) # # Because the even fields don't exist in the index table we're # not able to change them. # self.assertRaises(db.DBNotFoundError, lambda: pridb.put('a', 'ABCDE')) # # Check that everything is still as expected. # for index in indexes: secdb = secdbs[index] key = 'abcde'[index] if index % 2 == 1: assert secdb.get(key) == 'abcde' else: self.assertRaises(db.DBNotFoundError, lambda: pridb.put('a', 'ABCDE')) # # Write a record that changes all the odd fields (possible # because they are in the indexes) and doesn't change the even # fields (which will correct them as a side-effect) # pridb.put('a', 'aBcDeF') for index in indexes: secdb = secdbs[index] key = 'aBcDeF'[index] assert secdb.get(key) == 'aBcDeF' # # Now the record is fully indexed and we can change all fields. # pridb.put('a', '12345') # 'lambda' is not a closure so we use 'def to get an environment def extractor(index): return lambda key, value: value[index] def suite(): return unittest.makeSuite(BetsTest, 'test') def openDB(dbname=None, filename='/tmp/betsdb'): bdb = db.DB() bdb.open(filename, dbname, db.DB_BTREE, db.DB_CREATE) return bdb # Command-line support. if __name__ == '__main__': runner = unittest.TextTestRunner(verbosity=2) runner.run(suite())