diff -rc db-4.5.20-orig/env/env_register.c db-4.5.20/env/env_register.c *** db-4.5.20-orig/env/env_register.c 2006-09-09 16:29:04.000000000 +0200 --- db-4.5.20/env/env_register.c 2007-05-16 21:13:27.000000000 +0200 *************** *** 255,260 **** --- 255,262 ---- buf[nr - 1] = '\0'; } + //sleep(3); + pos = (off_t)lcnt * PID_LEN; if (REGISTRY_LOCK(dbenv, pos, 1) == 0) { if ((ret = REGISTRY_UNLOCK(dbenv, pos)) != 0) *************** *** 361,366 **** --- 363,392 ---- if (recovery_failed) goto err; + //sleep(5); + + /* + * Acquire an exclusive lock to prevent a race like this: + * + * 1) Process X is about to exit and process Y is just + * starting. + * 2) Process Y reads X's slot. + * 3) Process X clears its slot. + * 4) Process Y sees that X's slot isn't cleared yet (since it + * just read the old value). + * 5) Process X closes the registry, releases the lock on its + * slot. + * 6) Process Y tries to acquire X's slot and succeeds, so it + * concludes that X died and recovery is needed. + * + * A more efficient solution to this problem would be to let + * __envreg_add acquire the lock on a slot first, and *then* + * read the slot (instead of the other way around). Then we + * wouldn't need the exclusive lock here. + */ + if ((ret = REGISTRY_EXCL_LOCK(dbenv, 0)) != 0) + goto err; + /* * Why isn't an exclusive lock necessary to discard a DB_ENV handle? *