Index: trunk/src/cutest_sh_unix.c
===================================================================
--- trunk/src/cutest_sh_unix.c	(revision 170)
+++ trunk/src/cutest_sh_unix.c	(revision 171)
@@ -7,4 +7,145 @@
 #include "sh_unix.h"
 
+int malloc_count = 0;
+
+void Test_dnmalloc (CuTest *tc) {
+
+  const int nalloc = 64 /* original dnmalloc 1.0-beta5 fails fo >= 45 */;
+  int j, i;
+  int sum;
+  int i_malloc =  malloc_count;
+
+  char * buf;
+  char * area[256];
+
+  /* test reuse of last freed chunk */
+  buf = malloc(1024);
+  CuAssertPtrNotNull(tc, buf);
+  free(buf);
+  area[0] = malloc(1024);
+  CuAssertTrue(tc, buf == area[0]);
+  free(area[0]);
+
+  /* test realloc */
+  buf = malloc(16);
+  CuAssertPtrNotNull(tc, buf);
+  strcpy(buf, "testing realloc");
+  buf = realloc(buf, 32);
+  strcat(buf, "testing realloc");
+  CuAssertStrEquals(tc, "testing realloctesting realloc", buf);
+
+  i_malloc = malloc_count;
+
+  for (j = 0; j < 64; ++j)
+    {
+      buf = malloc((j+1) * 1024);
+      CuAssertPtrNotNull(tc, buf);
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, (i_malloc + 1));
+#endif
+      free(buf);
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, i_malloc);
+#endif
+    }
+
+  /* test realloc */
+  buf = malloc(16);
+  CuAssertPtrNotNull(tc, buf);
+  strcpy(buf, "testing realloc");
+  buf = realloc(buf, 32);
+  strcat(buf, "testing realloc");
+  CuAssertStrEquals(tc, "testing realloctesting realloc", buf);
+
+  i_malloc = malloc_count;
+
+  for (j = 0; j < 64; ++j)
+    {
+      buf = calloc(1, (j+1) * 1024);
+      CuAssertPtrNotNull(tc, buf);
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, (i_malloc + 1));
+#endif
+      sum = 0;
+      for (i = 0; i < ((j+1) * 1024); ++i)
+	sum += buf[i];
+      CuAssertIntEquals (tc, 0, sum);
+      free(buf);
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, i_malloc);
+#endif
+    }
+
+  /* test realloc */
+  buf = malloc(16);
+  CuAssertPtrNotNull(tc, buf);
+  strcpy(buf, "testing realloc");
+  buf = realloc(buf, 32);
+  strcat(buf, "testing realloc");
+  CuAssertStrEquals(tc, "testing realloctesting realloc", buf);
+
+  for (j = 0; j < nalloc; ++j)
+    {
+      area[j] = malloc((j+1) * 1024);
+      CuAssertPtrNotNull(tc, area[j]);
+#ifndef USE_SYSTEM_MALLOC
+      // CuAssertIntEquals (tc, malloc_count, (i_malloc + (j+1)));
+#endif
+      memset(area[j], (unsigned char) ('a'+1), (j+1) * 1024);
+    }
+
+  i_malloc =  malloc_count;
+
+  for (j = 0; j < nalloc; ++j)
+    {
+      sum = 0;
+      for (i = 0; i < ((j+1) * 1024); ++i)
+	sum +=  area[j][i];
+      CuAssertIntEquals (tc, sum, ((j+1) * 1024 * ((unsigned char) ('a'+1))));
+      free(area[j]);
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, i_malloc - (j+1));
+#endif
+    }
+
+  /* test realloc */
+  buf = malloc(16);
+  CuAssertPtrNotNull(tc, buf);
+  strcpy(buf, "testing realloc");
+  buf = realloc(buf, 32);
+  strcat(buf, "testing realloc");
+  CuAssertStrEquals(tc, "testing realloctesting realloc", buf);
+
+
+  for (j = 0; j < 32; ++j)
+    {
+      i_malloc =  malloc_count;
+      buf = malloc((j+1) * 1024 * 1024);
+      CuAssertPtrNotNull(tc, buf);
+      for (i = 0; i < 32; ++i)
+	{
+	  area[i] = malloc((i+1) * 1024);
+	  CuAssertPtrNotNull(tc, area[i]);
+	}
+      free(buf);
+      for (i = 0; i < 32; ++i)
+	{
+	  free(area[i]);
+	}
+#ifndef USE_SYSTEM_MALLOC
+      CuAssertIntEquals (tc, malloc_count, i_malloc);
+#endif
+    }
+
+  /* test realloc */
+  buf = malloc(16);
+  CuAssertPtrNotNull(tc, buf);
+  strcpy(buf, "testing realloc");
+  buf = realloc(buf, 32);
+  strcat(buf, "testing realloc");
+  CuAssertStrEquals(tc, "testing realloctesting realloc", buf);
+}
+
+  
 void Test_sh_unix_lookup_page (CuTest *tc) {
 
Index: trunk/src/dnmalloc.c
===================================================================
--- trunk/src/dnmalloc.c	(revision 171)
+++ trunk/src/dnmalloc.c	(revision 171)
@@ -0,0 +1,5481 @@
+/* DistriNet malloc (dnmalloc): a more secure memory allocator. 
+   Copyright (C) 2005, Yves Younan, Wouter Joosen, Frank Piessens and Rainer Wichmann
+   The authors can be contacted by:
+      Email: dnmalloc@fort-knox.org
+      Address:
+      	     Yves Younan
+      	     Celestijnenlaan 200A
+      	     B-3001 Heverlee
+      	     Belgium
+   
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+   
+*/
+
+/* Current version: dnmalloc 1.0  */
+/* Includes arc4random from OpenBSD, which is under the BDS license     */
+
+/* Versions:
+   0.1-0.5:
+   Proof of concept implementation by Hans Van den Eynden and Yves Younan
+   0.6-0.7:
+   Bug fixes by Yves Younan
+   0.8-1.0.beta4:
+   Reimplementation from scratch by Yves Younan
+   1.0.beta4:
+   Public release
+   1.0.beta5:
+   Prev_chunkinfo speeded up, was really slow because of the way we did lookups
+   A freechunkinfo region is now freed when it is completely empty and 
+   not the current one
+
+   1.0 (Rainer Wichmann [support at la dash samhna dot org]):
+   ---------------------
+
+   Compiler warnings fixed
+   Define REALLOC_ZERO_BYTES_FREES because it's what GNU libc does
+       (and what the standard says)
+   Removed unused code
+   Fix       assert(aligned_OK(chunk(newp)));
+         ->  assert(aligned_OK(chunk(oldp)));
+   Fix statistics in sYSMALLOc
+   Fix overwrite of av->top in sYSMALLOc
+   Provide own assert(), glibc assert() doesn't work (calls malloc)
+   Fix bug in mEMALIGn(), put remainder in hashtable before calling fREe
+   Remove cfree, independent_cmalloc, independent_comalloc (untested
+       public functions not covered by any standard)
+   Provide posix_memalign (that one is in the standard)
+   Move the malloc_state struct to mmapped memory protected by guard pages 
+   Add arc4random function to initialize random canary on startup
+   Implement random canary at end of (re|m)alloced/memaligned buffer,
+       check at free/realloc
+   Remove code conditional on !HAVE_MMAP, since mmap is required anyway.
+   Use standard HAVE_foo macros (as generated by autoconf) instead of LACKS_foo
+
+   Profiling: Reorder branches in hashtable_add, next_chunkinfo, 
+                  prev_chunkinfo, hashtable_insert, mALLOc, fREe, request2size,
+	          checked_request2size (gcc predicts if{} branch to be taken).
+	      Use UNLIKELY macro (gcc __builtin_expect()) where branch
+                  reordering would make the code awkward.
+
+   Portability: Hashtable always covers full 32bit address space to
+                avoid assumptions about memory layout.
+   Portability: Try hard to enforce mapping of mmapped memory into
+                32bit address space, even on 64bit systems.
+   Portability: Provide a dnmalloc_pthread_init() function, since
+                pthread locking on HP-UX only works if initialized
+		after the application has entered main().
+   Portability: On *BSD, pthread_mutex_lock is unusable since it
+                calls malloc, use spinlocks instead.
+   Portability: Dynamically detect whether the heap is within
+                32bit address range (e.g. on Linux x86_64, it isn't).
+		Don't use sbrk() if the heap is mapped to an address 
+		outside the 32bit range, since this doesn't work with 
+		the hashtable. New macro morecore32bit.
+                
+   Success on: HP-UX 11.11/pthread, Linux/pthread (32/64 bit),
+               FreeBSD/pthread, and Solaris 10 i386/pthread.
+   Fail    on: OpenBSD/pthread (in  _thread_machdep_save_float_state),
+               might be related to OpenBSD pthread internals (??).
+	       Non-treaded version (#undef USE_MALLOC_LOC) 
+	       works on OpenBSD.
+   
+   There may be some bugs left in this version. please use with caution.
+*/
+
+
+
+/* Please read the following papers for documentation: 
+
+   Yves Younan, Wouter Joosen, and Frank Piessens, A Methodology for Designing
+   Countermeasures against Current and Future Code Injection Attacks,
+   Proceedings of the Third IEEE International Information Assurance
+   Workshop 2005 (IWIA2005), College Park, Maryland, U.S.A., March 2005,
+   IEEE, IEEE Press.
+   http://www.fort-knox.org/younany_countermeasures.pdf
+   
+   Yves Younan, Wouter Joosen and Frank Piessens and Hans Van den
+   Eynden. Security of Memory Allocators for C and C++. Technical Report
+   CW419, Departement Computerwetenschappen, Katholieke Universiteit
+   Leuven, July 2005. http://www.fort-knox.org/CW419.pdf
+ 
+ */
+
+/* Compile:
+   gcc -fPIC -rdynamic -c -Wall dnmalloc-portable.c
+   "Link": 
+   Dynamic:
+   gcc -shared -Wl,-soname,libdnmalloc.so.0 -o libdnmalloc.so.0.0 dnmalloc-portable.o -lc
+   Static:
+   ar -rv libdnmalloc.a dnmalloc-portable.o
+   
+*/
+
+/* 
+   dnmalloc is based on dlmalloc 2.7.2 (by Doug Lea (dl@cs.oswego.edu))
+   dlmalloc was released as public domain and contained the following license:
+   
+   "This is a version (aka dlmalloc) of malloc/free/realloc written by
+   Doug Lea and released to the public domain.  Use, modify, and
+   redistribute this code without permission or acknowledgement in any
+   way you wish.  Send questions, comments, complaints, performance
+   data, etc to dl@cs.oswego.edu
+   
+   * VERSION 2.7.2 Sat Aug 17 09:07:30 2002  Doug Lea  (dl at gee)
+   
+   Note: There may be an updated version of this malloc obtainable at
+   ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+   Check before installing!"
+   
+*/
+
+/* The following preprocessor macros are tested, 
+ *   and hence should have #define directives:
+ *
+ *   HAVE_CONFIG_H    Define to #include "config.h" (autoconf-generated)
+ *
+ *   HAVE_UNISTD_H    Define to #include <unistd.h>
+ *
+ *   HAVE_SYS_UIO_H   Define to #include <sys/uio.h> (for writev)
+ *   HAVE_WRITEV      Define if the 'writev' function is available
+ *
+ *   HAVE_SYS_PARAM_H Define to #include <sys/param.h> (for pagesize)
+ *
+ *   HAVE_MALLOC_H    Define to #include <malloc.h> (for struct mallinfo)
+ *
+ *   HAVE_FCNTL_H     Define to #include <fcntl.h>
+ *
+ *   HAVE_SYS_MMAN_H  Define to #include <sys/mman.h>
+ *   HAVE_MMAP        Define if the 'mmap' function is available.
+ *
+ *   HAVE_SCHED_H     Define to #include <sched.h>
+ *   HAVE_SCHED_YIELD Define id the 'sched_yield' function is available
+ */
+
+
+/*
+  __STD_C should be nonzero if using ANSI-standard C compiler, a C++
+  compiler, or a C compiler sufficiently close to ANSI to get away
+  with it.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if defined (__GNUC__) && __GNUC__ > 2
+# define LIKELY(expression) (__builtin_expect(!!(expression), 1))
+# define UNLIKELY(expression) (__builtin_expect(!!(expression), 0))
+# define __attribute_malloc__ __attribute__ ((__malloc__))
+#else
+# define LIKELY(x)       (x)
+# define UNLIKELY(x)     (x)
+# define __attribute_malloc__ /* Ignore */
+#endif
+
+/*
+  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate
+  large blocks.  This is currently only possible on Linux with
+  kernel versions newer than 1.3.77.
+*/
+
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#define _GNU_SOURCE
+#else
+#define HAVE_MREMAP 0
+#endif
+#endif /* HAVE_MREMAP */
+
+
+
+#ifndef __STD_C
+#if defined(__STDC__) || defined(_cplusplus)
+#define __STD_C     1
+#else
+#define __STD_C     0
+#endif 
+#endif /*__STD_C*/
+
+
+/*
+  Void_t* is the pointer type that malloc should say it returns
+*/
+
+#ifndef Void_t
+#if (__STD_C || defined(WIN32))
+#define Void_t      void
+#else
+#define Void_t      char
+#endif
+#endif /*Void_t*/
+
+#if __STD_C
+#include <stddef.h>   /* for size_t */
+#else
+#include <sys/types.h>
+#endif
+
+#if !defined(USE_SYSTEM_MALLOC)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* define HAVE_UNISTD_H if your system has a <unistd.h>. */
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif
+
+#include <stdio.h>    /* needed for malloc_stats */
+#include <errno.h>    /* needed for optional MALLOC_FAILURE_ACTION */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include <sys/resource.h>
+
+extern int errno;
+
+  /* 0: lazy, 
+   * 1: medium (assertions compiled in), 
+   * 2: high (guard pages at end of hash table and ciregions) 
+   * 3: paranoid (guards at end of each allocated chunk, check at free) 
+   */
+#ifndef PARANOIA
+#define PARANOIA 9
+#endif
+
+  /* Using assert() with multithreading will cause the code 
+   * to deadlock since glibc __assert_fail will call malloc().
+   * We need our very own assert().
+   */
+typedef void assert_handler_tp(const char * error, const char *file, int line);
+
+#if  PARANOIA > 0
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#if !defined(SAMHAIN) 
+static void default_assert_handler(const char *error, 
+				   const char *file, int line)
+{
+#ifdef HAVE_WRITEV
+  struct iovec iov[5];
+  char * i1 = "assertion failed (";
+  char * i3 = "): ";
+  char * i5 = "\n";
+
+  iov[0].iov_base = i1;               iov[0].iov_len = strlen(i1); 
+  iov[1].iov_base = (char*) file;     iov[1].iov_len = strlen(file); 
+  iov[2].iov_base = i3;               iov[2].iov_len = strlen(i3); 
+  iov[3].iov_base = (char*) error;    iov[3].iov_len = strlen(error); 
+  iov[4].iov_base = i5;               iov[4].iov_len = strlen(i5); 
+  writev(STDERR_FILENO, iov, 5);
+#else
+  fputs("assertion failed (", stderr);
+  fputs(file, stderr);
+  fputs("): ", stderr);
+  fputs(error, stderr);
+  fputc('\n', stderr);
+#endif
+  abort();
+}
+static assert_handler_tp *assert_handler = default_assert_handler;
+#else
+extern void safe_fatal(const char *error, 
+		       const char *file, int line);
+static assert_handler_tp *assert_handler = safe_fatal;
+#endif
+
+#define assert(x)                               \
+  do {		                                \
+    if (UNLIKELY(!(x))) {			\
+      assert_handler(#x, __FILE__, __LINE__);	\
+    }                                           \
+  } while (0)
+
+#else
+
+static assert_handler_tp *assert_handler = NULL;
+#define NDEBUG
+#define assert(x) ((void)0)
+
+#endif
+
+assert_handler_tp *dnmalloc_set_handler(assert_handler_tp *new)
+{
+  assert_handler_tp *old = assert_handler;
+  assert_handler = new;
+  return old;
+}
+
+
+#include <stdarg.h>
+
+  /* define for debugging */
+  /* #define DNMALLOC_DEBUG */ 
+
+  /* Do some extra checks? */
+  /* #define DNMALLOC_CHECKS */
+
+  /*
+    The unsigned integer type used for comparing any two chunk sizes.
+    This should be at least as wide as size_t, but should not be signed.
+  */
+
+#ifndef CHUNK_SIZE_T
+#define CHUNK_SIZE_T unsigned long
+#endif
+
+/* 
+  The unsigned integer type used to hold addresses when they are are
+  manipulated as integers. Except that it is not defined on all
+  systems, intptr_t would suffice.
+*/
+#ifndef PTR_UINT
+#define PTR_UINT unsigned long
+#endif
+
+
+/*
+  INTERNAL_SIZE_T is the word-size used for internal bookkeeping
+  of chunk sizes.
+
+  The default version is the same as size_t.
+
+  While not strictly necessary, it is best to define this as an
+  unsigned type, even if size_t is a signed type. This may avoid some
+  artificial size limitations on some systems.
+
+  On a 64-bit machine, you may be able to reduce malloc overhead by
+  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the
+  expense of not being able to handle more than 2^32 of malloced
+  space. If this limitation is acceptable, you are encouraged to set
+  this unless you are on a platform requiring 16byte alignments. In
+  this case the alignment requirements turn out to negate any
+  potential advantages of decreasing size_t word size.
+
+  Implementors: Beware of the possible combinations of:
+     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits,
+       and might be the same width as int or as long
+     - size_t might have different width and signedness as INTERNAL_SIZE_T
+     - int and long might be 32 or 64 bits, and might be the same width
+  To deal with this, most comparisons and difference computations
+  among INTERNAL_SIZE_Ts should cast them to CHUNK_SIZE_T, being
+  aware of the fact that casting an unsigned int to a wider long does
+  not sign-extend. (This also makes checking for negative numbers
+  awkward.) Some of these casts result in harmless compiler warnings
+  on some systems.
+*/
+
+#ifndef INTERNAL_SIZE_T
+#define INTERNAL_SIZE_T size_t
+#endif
+
+/* The corresponding word size */
+#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))
+
+
+
+/*
+  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
+  It must be a power of two at least 2 * SIZE_SZ, even on machines
+  for which smaller alignments would suffice. It may be defined as
+  larger than this though. Note however that code and data structures
+  are optimized for the case of 8-byte alignment.
+*/
+
+
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT       (2 * SIZE_SZ)
+#endif
+
+/* The corresponding bit mask value */
+#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
+
+
+
+/*
+  REALLOC_ZERO_BYTES_FREES should be set if a call to
+  realloc with zero bytes should be the same as a call to free.
+  Some people think it should. Otherwise, since this malloc
+  returns a unique pointer for malloc(0), so does realloc(p, 0).
+*/
+
+#define REALLOC_ZERO_BYTES_FREES
+
+/*
+  TRIM_FASTBINS controls whether free() of a very small chunk can
+  immediately lead to trimming. Setting to true (1) can reduce memory
+  footprint, but will almost always slow down programs that use a lot
+  of small chunks.
+
+  Define this only if you are willing to give up some speed to more
+  aggressively reduce system-level memory footprint when releasing
+  memory in programs that use many small chunks.  You can get
+  essentially the same effect by setting MXFAST to 0, but this can
+  lead to even greater slowdowns in programs using many small chunks.
+  TRIM_FASTBINS is an in-between compile-time option, that disables
+  only those chunks bordering topmost memory from being placed in
+  fastbins.
+*/
+
+#ifndef TRIM_FASTBINS
+#define TRIM_FASTBINS  0
+#endif
+
+
+/*
+  USE_DL_PREFIX will prefix all public routines with the string 'dl'.
+  This is necessary when you only want to use this malloc in one part 
+  of a program, using your regular system malloc elsewhere.
+*/
+
+/* #define USE_DL_PREFIX */
+
+
+/*
+  USE_MALLOC_LOCK causes wrapper functions to surround each
+  callable routine with pthread mutex lock/unlock.
+
+  USE_MALLOC_LOCK forces USE_PUBLIC_MALLOC_WRAPPERS to be defined
+*/
+
+/* #define USE_MALLOC_LOCK */
+
+
+/*
+  If USE_PUBLIC_MALLOC_WRAPPERS is defined, every public routine is
+  actually a wrapper function that first calls MALLOC_PREACTION, then
+  calls the internal routine, and follows it with
+  MALLOC_POSTACTION. This is needed for locking, but you can also use
+  this, without USE_MALLOC_LOCK, for purposes of interception,
+  instrumentation, etc. It is a sad fact that using wrappers often
+  noticeably degrades performance of malloc-intensive programs.
+*/
+
+
+#ifdef USE_MALLOC_LOCK
+#define USE_PUBLIC_MALLOC_WRAPPERS
+#else
+/* #define USE_PUBLIC_MALLOC_WRAPPERS */
+#endif
+
+
+/* 
+   Two-phase name translation.
+   All of the actual routines are given mangled names.
+   When wrappers are used, they become the public callable versions.
+   When DL_PREFIX is used, the callable names are prefixed.
+*/
+
+#ifndef USE_PUBLIC_MALLOC_WRAPPERS
+#define cALLOc      public_cALLOc
+#define fREe        public_fREe
+#define mALLOc      public_mALLOc
+#define mEMALIGn    public_mEMALIGn
+#define posix_mEMALIGn    public_posix_mEMALIGn
+#define rEALLOc     public_rEALLOc
+#define vALLOc      public_vALLOc
+#define pVALLOc     public_pVALLOc
+#define mALLINFo    public_mALLINFo
+#define mALLOPt     public_mALLOPt
+#define mTRIm       public_mTRIm
+#define mSTATs      public_mSTATs
+#define mUSABLe     public_mUSABLe
+#endif
+
+#ifdef USE_DL_PREFIX
+#define public_cALLOc    dlcalloc
+#define public_fREe      dlfree
+#define public_mALLOc    dlmalloc
+#define public_mEMALIGn  dlmemalign
+#define public_posix_mEMALIGn  dlposix_memalign
+#define public_rEALLOc   dlrealloc
+#define public_vALLOc    dlvalloc
+#define public_pVALLOc   dlpvalloc
+#define public_mALLINFo  dlmallinfo
+#define public_mALLOPt   dlmallopt
+#define public_mTRIm     dlmalloc_trim
+#define public_mSTATs    dlmalloc_stats
+#define public_mUSABLe   dlmalloc_usable_size
+#else /* USE_DL_PREFIX */
+#define public_cALLOc    calloc
+#define public_fREe      free
+#define public_mALLOc    malloc
+#define public_mEMALIGn  memalign
+#define public_posix_mEMALIGn  posix_memalign
+#define public_rEALLOc   realloc
+#define public_vALLOc    valloc
+#define public_pVALLOc   pvalloc
+#define public_mALLINFo  mallinfo
+#define public_mALLOPt   mallopt
+#define public_mTRIm     malloc_trim
+#define public_mSTATs    malloc_stats
+#define public_mUSABLe   malloc_usable_size
+#endif /* USE_DL_PREFIX */
+
+
+/*
+  HAVE_MEMCPY should be defined if you are not otherwise using
+  ANSI STD C, but still have memcpy and memset in your C library
+  and want to use them in calloc and realloc. Otherwise simple
+  macro versions are defined below.
+
+  USE_MEMCPY should be defined as 1 if you actually want to
+  have memset and memcpy called. People report that the macro
+  versions are faster than libc versions on some systems.
+  
+  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks
+  (of <= 36 bytes) are manually unrolled in realloc and calloc.
+*/
+
+#ifndef HAVE_MEMCPY
+#define HAVE_MEMCPY
+#endif
+
+#ifndef USE_MEMCPY
+#ifdef HAVE_MEMCPY
+#define USE_MEMCPY 1
+#else
+#define USE_MEMCPY 0
+#endif
+#endif
+
+
+#if (__STD_C || defined(HAVE_MEMCPY))
+
+#ifdef WIN32
+/* On Win32 memset and memcpy are already declared in windows.h */
+#else
+#if __STD_C
+void* memset(void*, int, size_t);
+void* memcpy(void*, const void*, size_t);
+#else
+Void_t* memset();
+Void_t* memcpy();
+#endif
+#endif
+#endif
+
+/*
+  MALLOC_FAILURE_ACTION is the action to take before "return 0" when
+  malloc fails to be able to return memory, either because memory is
+  exhausted or because of illegal arguments.
+  
+  By default, sets errno if running on STD_C platform, else does nothing.  
+*/
+
+#ifndef MALLOC_FAILURE_ACTION
+#if __STD_C
+#define MALLOC_FAILURE_ACTION \
+   errno = ENOMEM;
+
+#else
+#define MALLOC_FAILURE_ACTION
+#endif
+#endif
+
+/*
+  MORECORE-related declarations. By default, rely on sbrk
+*/
+
+
+#if !defined(HAVE_UNISTD_H)
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+#if __STD_C
+extern Void_t*     sbrk(ptrdiff_t);
+#else
+extern Void_t*     sbrk();
+#endif
+#endif
+#endif
+
+/*
+  MORECORE_FAILURE is the value returned upon failure of MORECORE
+  as well as mmap. Since it cannot be an otherwise valid memory address,
+  and must reflect values of standard sys calls, you probably ought not
+  try to redefine it.
+*/
+
+#ifndef MORECORE_FAILURE
+#define MORECORE_FAILURE ((void*)(-1UL))
+#endif
+
+/*
+  MORECORE is the name of the routine to call to obtain more memory
+  from the system.  See below for general guidance on writing
+  alternative MORECORE functions, as well as a version for WIN32 and a
+  sample version for pre-OSX macos.
+*/
+
+#ifndef MORECORE
+#define MORECORE sbrk
+#endif
+
+
+/*
+  If MORECORE_CONTIGUOUS is true, take advantage of fact that
+  consecutive calls to MORECORE with positive arguments always return
+  contiguous increasing addresses.  This is true of unix sbrk.  Even
+  if not defined, when regions happen to be contiguous, malloc will
+  permit allocations spanning regions obtained from different
+  calls. But defining this when applicable enables some stronger
+  consistency checks and space efficiencies. 
+*/
+
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif
+
+/*
+  Define MORECORE_CANNOT_TRIM if your version of MORECORE
+  cannot release space back to the system when given negative
+  arguments. This is generally necessary only if you are using
+  a hand-crafted MORECORE function that cannot handle negative arguments.
+*/
+
+/* #define MORECORE_CANNOT_TRIM */
+
+
+/*
+  This malloc requires mmap for heap management data. It is an error
+  if mmap is not available.
+
+  Additionally, mmap will be used to satisfy large requests.
+*/
+
+#ifndef HAVE_MMAP
+#  ifdef SAMHAIN
+#    error mmap not available, use --disable-dnmalloc
+#  else
+#    error HAVE_MMAP not defined, has your operating system mmap?
+#  endif
+#endif
+
+/* 
+   Standard unix mmap using /dev/zero clears memory so calloc doesn't
+   need to.
+*/
+
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif
+
+
+/* 
+   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if
+   sbrk fails, and mmap is used as a backup (which is done only if
+   HAVE_MMAP).  The value must be a multiple of page size.  This
+   backup strategy generally applies only when systems have "holes" in
+   address space, so sbrk cannot perform contiguous expansion, but
+   there is still space available on system.  On systems for which
+   this is known to be useful (i.e. most linux kernels), this occurs
+   only when programs allocate huge amounts of memory.  Between this,
+   and the fact that mmap regions tend to be limited, the size should
+   be large, to avoid too many mmap calls and thus avoid running out
+   of kernel resources.
+*/
+
+#ifndef MMAP_AS_MORECORE_SIZE
+#define MMAP_AS_MORECORE_SIZE (1024 * 1024)
+#endif
+
+
+/*
+  The system page size. To the extent possible, this malloc manages
+  memory from the system in page-size units.  Note that this value is
+  cached during initialization into a field of malloc_state. So even
+  if malloc_getpagesize is a function, it is only called once.
+
+  The following mechanics for getpagesize were adapted from bsd/gnu
+  getpagesize.h. If none of the system-probes here apply, a value of
+  4096 is used, which should be OK: If they don't apply, then using
+  the actual value probably doesn't impact performance.
+*/
+
+
+#ifndef malloc_getpagesize
+
+#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+#    ifndef _SC_PAGE_SIZE
+#      define _SC_PAGE_SIZE _SC_PAGESIZE
+#    endif
+#  endif
+
+#  ifdef _SC_PAGE_SIZE
+#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+#  else
+#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+       extern size_t getpagesize();
+#      define malloc_getpagesize getpagesize()
+#    else
+#      ifdef WIN32 /* use supplied emulation of getpagesize */
+#        define malloc_getpagesize getpagesize() 
+#      else
+#        if defined(HAVE_SYS_PARAM_H)
+#          include <sys/param.h>
+#        endif
+#        ifdef EXEC_PAGESIZE
+#          define malloc_getpagesize EXEC_PAGESIZE
+#        else
+#          ifdef NBPG
+#            ifndef CLSIZE
+#              define malloc_getpagesize NBPG
+#            else
+#              define malloc_getpagesize (NBPG * CLSIZE)
+#            endif
+#          else
+#            ifdef NBPC
+#              define malloc_getpagesize NBPC
+#            else
+#              ifdef PAGESIZE
+#                define malloc_getpagesize PAGESIZE
+#              else /* just guess */
+#                define malloc_getpagesize (4096) 
+#              endif
+#            endif
+#          endif
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/*
+  This version of malloc supports the standard SVID/XPG mallinfo
+  routine that returns a struct containing usage properties and
+  statistics. It should work on any SVID/XPG compliant system that has
+  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to
+  install such a thing yourself, cut out the preliminary declarations
+  as described above and below and save them in a malloc.h file. But
+  there's no compelling reason to bother to do this.)
+
+  The main declaration needed is the mallinfo struct that is returned
+  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a
+  bunch of fields that are not even meaningful in this version of
+  malloc.  These fields are are instead filled by mallinfo() with
+  other numbers that might be of interest.
+
+  HAVE_MALLOC_H should be set if you have a
+  /usr/include/malloc.h file that includes a declaration of struct
+  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant
+  version is declared below.  These must be precisely the same for
+  mallinfo() to work.  The original SVID version of this struct,
+  defined on most systems with mallinfo, declares all fields as
+  ints. But some others define as unsigned long. If your system
+  defines the fields using a type of different width than listed here,
+  you must #include your system version and #define
+  HAVE_MALLOC_H.
+*/
+
+/* #define HAVE_MALLOC_H */
+
+/* On *BSD, malloc.h is deprecated, and on some *BSD including 
+ * it may actually raise an error.
+ */
+#if defined(HAVE_MALLOC_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
+#include <malloc.h>
+#else
+
+/* SVID2/XPG mallinfo structure */
+
+struct mallinfo {
+  int arena;    /* non-mmapped space allocated from system */
+  int ordblks;  /* number of free chunks */
+  int smblks;   /* number of fastbin blocks */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* space in mmapped regions */
+  int usmblks;  /* maximum total allocated space */
+  int fsmblks;  /* space available in freed fastbin blocks */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total free space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+
+/*
+  SVID/XPG defines four standard parameter numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports other
+  options in mallopt described below.
+*/
+#endif
+
+
+/* ---------- description of public routines ------------ */
+
+/*
+  malloc(size_t n)
+  Returns a pointer to a newly allocated chunk of at least n bytes, or null
+  if no space is available. Additionally, on failure, errno is
+  set to ENOMEM on ANSI C systems.
+
+  If n is zero, malloc returns a minumum-sized chunk. (The minimum
+  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit
+  systems.)  On most systems, size_t is an unsigned type, so calls
+  with negative arguments are interpreted as requests for huge amounts
+  of space, which will often fail. The maximum supported value of n
+  differs across systems, but is in all cases less than the maximum
+  representable value of a size_t.
+*/
+#if __STD_C
+Void_t*  public_mALLOc(size_t) __attribute_malloc__;
+#else
+Void_t*  public_mALLOc();
+#endif
+
+/*
+  free(Void_t* p)
+  Releases the chunk of memory pointed to by p, that had been previously
+  allocated using malloc or a related routine such as realloc.
+  It has no effect if p is null. It can have arbitrary (i.e., bad!)
+  effects if p has already been freed.
+
+  Unless disabled (using mallopt), freeing very large spaces will
+  when possible, automatically trigger operations that give
+  back unused memory to the system, thus reducing program footprint.
+*/
+#if __STD_C
+void     public_fREe(Void_t*);
+#else
+void     public_fREe();
+#endif
+
+/*
+  calloc(size_t n_elements, size_t element_size);
+  Returns a pointer to n_elements * element_size bytes, with all locations
+  set to zero.
+*/
+#if __STD_C
+Void_t*  public_cALLOc(size_t, size_t) __attribute_malloc__;
+#else
+Void_t*  public_cALLOc();
+#endif
+
+/*
+  realloc(Void_t* p, size_t n)
+  Returns a pointer to a chunk of size n that contains the same data
+  as does chunk p up to the minimum of (n, p's size) bytes, or null
+  if no space is available. 
+
+  The returned pointer may or may not be the same as p. The algorithm
+  prefers extending p when possible, otherwise it employs the
+  equivalent of a malloc-copy-free sequence.
+
+  If p is null, realloc is equivalent to malloc.  
+
+  If space is not available, realloc returns null, errno is set (if on
+  ANSI) and p is NOT freed.
+
+  if n is for fewer bytes than already held by p, the newly unused
+  space is lopped off and freed if possible.  Unless the #define
+  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of
+  zero (re)allocates a minimum-sized chunk.
+
+  Large chunks that were internally obtained via mmap will always
+  be reallocated using malloc-copy-free sequences unless
+  the system supports MREMAP (currently only linux).
+
+  The old unix realloc convention of allowing the last-free'd chunk
+  to be used as an argument to realloc is not supported.
+*/
+#if __STD_C
+Void_t*  public_rEALLOc(Void_t*, size_t) __attribute_malloc__;
+#else
+Void_t*  public_rEALLOc();
+#endif
+
+/*
+  memalign(size_t alignment, size_t n);
+  Returns a pointer to a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on memalign is a sure way to fragment space.
+*/
+#if __STD_C
+Void_t*  public_mEMALIGn(size_t, size_t) __attribute_malloc__;
+#else
+Void_t*  public_mEMALIGn();
+#endif
+
+/*
+  posix_memalign(void** memptr, size_t alignment, size_t n);
+  Sets *memptr to the address of a newly allocated chunk of n bytes, aligned
+  in accord with the alignment argument. Returns 0 on success, otherwise
+  an error (EINVAL for incorrect alignment, ENOMEM for out of memory).
+
+  The alignment must be a power of two, and a multiple of sizeof(void *).
+*/
+#if __STD_C
+int public_posix_mEMALIGn(Void_t**, size_t, size_t);
+#else
+int public_posix_mEMALIGn();
+#endif
+
+/*
+  valloc(size_t n);
+  Equivalent to memalign(pagesize, n), where pagesize is the page
+  size of the system. If the pagesize is unknown, 4096 is used.
+*/
+#if __STD_C
+Void_t*  public_vALLOc(size_t) __attribute_malloc__;
+#else
+Void_t*  public_vALLOc();
+#endif
+
+
+
+/*
+  mallopt(int parameter_number, int parameter_value)
+  Sets tunable parameters The format is to provide a
+  (parameter-number, parameter-value) pair.  mallopt then sets the
+  corresponding parameter to the argument value if it can (i.e., so
+  long as the value is meaningful), and returns 1 if successful else
+  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
+  normally defined in malloc.h.  Only one of these (M_MXFAST) is used
+  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply,
+  so setting them has no effect. But this malloc also supports four
+  other options in mallopt. See below for details.  Briefly, supported
+  parameters are as follows (listed defaults are for "typical"
+  configurations).
+
+  Symbol            param #   default    allowed param values
+  M_MXFAST          1         64         0-80  (0 disables fastbins)
+  M_TRIM_THRESHOLD -1         256*1024   any   (-1U disables trimming)
+  M_TOP_PAD        -2         0          any  
+  M_MMAP_THRESHOLD -3         256*1024   any   (or 0 if no MMAP support)
+  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap)
+*/
+#if __STD_C
+int      public_mALLOPt(int, int);
+#else
+int      public_mALLOPt();
+#endif
+
+
+/*
+  mallinfo()
+  Returns (by copy) a struct containing various summary statistics:
+
+  arena:     current total non-mmapped bytes allocated from system 
+  ordblks:   the number of free chunks 
+  smblks:    the number of fastbin blocks (i.e., small chunks that
+               have been freed but not use resused or consolidated)
+  hblks:     current number of mmapped regions 
+  hblkhd:    total bytes held in mmapped regions 
+  usmblks:   the maximum total allocated space. This will be greater
+                than current total if trimming has occurred.
+  fsmblks:   total bytes held in fastbin blocks 
+  uordblks:  current total allocated space (normal or mmapped)
+  fordblks:  total free space 
+  keepcost:  the maximum number of bytes that could ideally be released
+               back to system via malloc_trim. ("ideally" means that
+               it ignores page restrictions etc.)
+
+  Because these fields are ints, but internal bookkeeping may
+  be kept as longs, the reported values may wrap around zero and 
+  thus be inaccurate.
+*/
+#if __STD_C
+struct mallinfo public_mALLINFo(void);
+#else
+struct mallinfo public_mALLINFo();
+#endif
+
+/*
+  pvalloc(size_t n);
+  Equivalent to valloc(minimum-page-that-holds(n)), that is,
+  round up n to nearest pagesize.
+ */
+#if __STD_C
+Void_t*  public_pVALLOc(size_t) __attribute_malloc__;
+#else
+Void_t*  public_pVALLOc();
+#endif
+
+/*
+  malloc_trim(size_t pad);
+
+  If possible, gives memory back to the system (via negative
+  arguments to sbrk) if there is unused memory at the `high' end of
+  the malloc pool. You can call this after freeing large blocks of
+  memory to potentially reduce the system-level memory requirements
+  of a program. However, it cannot guarantee to reduce memory. Under
+  some allocation patterns, some large free blocks of memory will be
+  locked between two used chunks, so they cannot be given back to
+  the system.
+  
+  The `pad' argument to malloc_trim represents the amount of free
+  trailing space to leave untrimmed. If this argument is zero,
+  only the minimum amount of memory to maintain internal data
+  structures will be left (one page or less). Non-zero arguments
+  can be supplied to maintain enough trailing space to service
+  future expected allocations without having to re-obtain memory
+  from the system.
+  
+  Malloc_trim returns 1 if it actually released any memory, else 0.
+  On systems that do not support "negative sbrks", it will always
+  rreturn 0.
+*/
+#if __STD_C
+int      public_mTRIm(size_t);
+#else
+int      public_mTRIm();
+#endif
+
+/*
+  malloc_usable_size(Void_t* p);
+
+  Returns the number of bytes you can actually use in
+  an allocated chunk, which may be more than you requested (although
+  often not) due to alignment and minimum size constraints.
+  You can use this many bytes without worrying about
+  overwriting other allocated objects. This is not a particularly great
+  programming practice. malloc_usable_size can be more useful in
+  debugging and assertions, for example:
+
+  p = malloc(n);
+  assert(malloc_usable_size(p) >= 256);
+
+*/
+#if __STD_C
+size_t   public_mUSABLe(Void_t*);
+#else
+size_t   public_mUSABLe();
+#endif
+
+/*
+  malloc_stats();
+  Prints on stderr the amount of space obtained from the system (both
+  via sbrk and mmap), the maximum amount (which may be more than
+  current if malloc_trim and/or munmap got called), and the current
+  number of bytes allocated via malloc (or realloc, etc) but not yet
+  freed. Note that this is the number of bytes allocated, not the
+  number requested. It will be larger than the number requested
+  because of alignment and bookkeeping overhead. Because it includes
+  alignment wastage as being in use, this figure may be greater than
+  zero even when no user-level chunks are allocated.
+
+  The reported current and maximum system memory can be inaccurate if
+  a program makes other calls to system memory allocation functions
+  (normally sbrk) outside of malloc.
+
+  malloc_stats prints only the most commonly interesting statistics.
+  More information can be obtained by calling mallinfo.
+
+*/
+#if __STD_C
+void     public_mSTATs();
+#else
+void     public_mSTATs();
+#endif
+
+/* mallopt tuning options */
+
+/*
+  M_MXFAST is the maximum request size used for "fastbins", special bins
+  that hold returned chunks without consolidating their spaces. This
+  enables future requests for chunks of the same size to be handled
+  very quickly, but can increase fragmentation, and thus increase the
+  overall memory footprint of a program.
+
+  This malloc manages fastbins very conservatively yet still
+  efficiently, so fragmentation is rarely a problem for values less
+  than or equal to the default.  The maximum supported value of MXFAST
+  is 80. You wouldn't want it any higher than this anyway.  Fastbins
+  are designed especially for use with many small structs, objects or
+  strings -- the default handles structs/objects/arrays with sizes up
+  to 16 4byte fields, or small strings representing words, tokens,
+  etc. Using fastbins for larger objects normally worsens
+  fragmentation without improving speed.
+
+  M_MXFAST is set in REQUEST size units. It is internally used in
+  chunksize units, which adds padding and alignment.  You can reduce
+  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc
+  algorithm to be a closer approximation of fifo-best-fit in all cases,
+  not just for larger requests, but will generally cause it to be
+  slower.
+*/
+
+
+/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */
+#ifndef M_MXFAST
+#define M_MXFAST            1    
+#endif
+
+#ifndef DEFAULT_MXFAST
+#define DEFAULT_MXFAST     64
+#endif
+
+
+/*
+  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory
+  to keep before releasing via malloc_trim in free().
+
+  Automatic trimming is mainly useful in long-lived programs.
+  Because trimming via sbrk can be slow on some systems, and can
+  sometimes be wasteful (in cases where programs immediately
+  afterward allocate more large chunks) the value should be high
+  enough so that your overall system performance would improve by
+  releasing this much memory.
+
+  The trim threshold and the mmap control parameters (see below)
+  can be traded off with one another. Trimming and mmapping are
+  two different ways of releasing unused memory back to the
+  system. Between these two, it is often possible to keep
+  system-level demands of a long-lived program down to a bare
+  minimum. For example, in one test suite of sessions measuring
+  the XF86 X server on Linux, using a trim threshold of 128K and a
+  mmap threshold of 192K led to near-minimal long term resource
+  consumption.
+
+  If you are using this malloc in a long-lived program, it should
+  pay to experiment with these values.  As a rough guide, you
+  might set to a value close to the average size of a process
+  (program) running on your system.  Releasing this much memory
+  would allow such a process to run in memory.  Generally, it's
+  worth it to tune for trimming rather tham memory mapping when a
+  program undergoes phases where several large chunks are
+  allocated and released in ways that can reuse each other's
+  storage, perhaps mixed with phases where there are no such
+  chunks at all.  And in well-behaved long-lived programs,
+  controlling release of large blocks via trimming versus mapping
+  is usually faster.
+
+  However, in most programs, these parameters serve mainly as
+  protection against the system-level effects of carrying around
+  massive amounts of unneeded memory. Since frequent calls to
+  sbrk, mmap, and munmap otherwise degrade performance, the default
+  parameters are set to relatively high values that serve only as
+  safeguards.
+
+  The trim value must be greater than page size to have any useful
+  effect.  To disable trimming completely, you can set to 
+  (unsigned long)(-1)
+
+  Trim settings interact with fastbin (MXFAST) settings: Unless
+  TRIM_FASTBINS is defined, automatic trimming never takes place upon
+  freeing a chunk with size less than or equal to MXFAST. Trimming is
+  instead delayed until subsequent freeing of larger chunks. However,
+  you can still force an attempted trim by calling malloc_trim.
+
+  Also, trimming is not generally possible in cases where
+  the main arena is obtained via mmap.
+
+  Note that the trick some people use of mallocing a huge space and
+  then freeing it at program startup, in an attempt to reserve system
+  memory, doesn't have the intended effect under automatic trimming,
+  since that memory will immediately be returned to the system.
+*/
+
+#define M_TRIM_THRESHOLD       -1
+
+#ifndef DEFAULT_TRIM_THRESHOLD
+#define DEFAULT_TRIM_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_TOP_PAD is the amount of extra `padding' space to allocate or
+  retain whenever sbrk is called. It is used in two ways internally:
+
+  * When sbrk is called to extend the top of the arena to satisfy
+  a new malloc request, this much padding is added to the sbrk
+  request.
+
+  * When malloc_trim is called automatically from free(),
+  it is used as the `pad' argument.
+
+  In both cases, the actual amount of padding is rounded
+  so that the end of the arena is always a system page boundary.
+
+  The main reason for using padding is to avoid calling sbrk so
+  often. Having even a small pad greatly reduces the likelihood
+  that nearly every malloc request during program start-up (or
+  after trimming) will invoke sbrk, which needlessly wastes
+  time.
+
+  Automatic rounding-up to page-size units is normally sufficient
+  to avoid measurable overhead, so the default is 0.  However, in
+  systems where sbrk is relatively slow, it can pay to increase
+  this value, at the expense of carrying around more memory than
+  the program needs.
+*/
+
+#define M_TOP_PAD              -2
+
+#ifndef DEFAULT_TOP_PAD
+#define DEFAULT_TOP_PAD        (0)
+#endif
+
+/*
+  M_MMAP_THRESHOLD is the request size threshold for using mmap()
+  to service a request. Requests of at least this size that cannot
+  be allocated using already-existing space will be serviced via mmap.
+  (If enough normal freed space already exists it is used instead.)
+
+  Using mmap segregates relatively large chunks of memory so that
+  they can be individually obtained and released from the host
+  system. A request serviced through mmap is never reused by any
+  other request (at least not directly; the system may just so
+  happen to remap successive requests to the same locations).
+
+  Segregating space in this way has the benefits that:
+
+   1. Mmapped space can ALWAYS be individually released back 
+      to the system, which helps keep the system level memory 
+      demands of a long-lived program low. 
+   2. Mapped memory can never become `locked' between
+      other chunks, as can happen with normally allocated chunks, which
+      means that even trimming via malloc_trim would not release them.
+   3. On some systems with "holes" in address spaces, mmap can obtain
+      memory that sbrk cannot.
+
+  However, it has the disadvantages that:
+
+   1. The space cannot be reclaimed, consolidated, and then
+      used to service later requests, as happens with normal chunks.
+   2. It can lead to more wastage because of mmap page alignment
+      requirements
+   3. It causes malloc performance to be more dependent on host
+      system memory management support routines which may vary in
+      implementation quality and may impose arbitrary
+      limitations. Generally, servicing a request via normal
+      malloc steps is faster than going through a system's mmap.
+
+  The advantages of mmap nearly always outweigh disadvantages for
+  "large" chunks, but the value of "large" varies across systems.  The
+  default is an empirically derived value that works well in most
+  systems.
+*/
+
+#define M_MMAP_THRESHOLD      -3
+
+#ifndef DEFAULT_MMAP_THRESHOLD
+#define DEFAULT_MMAP_THRESHOLD (256 * 1024)
+#endif
+
+/*
+  M_MMAP_MAX is the maximum number of requests to simultaneously
+  service using mmap. This parameter exists because
+. Some systems have a limited number of internal tables for
+  use by mmap, and using more than a few of them may degrade
+  performance.
+
+  The default is set to a value that serves only as a safeguard.
+  Setting to 0 disables use of mmap for servicing large requests.  If
+  HAVE_MMAP is not set, the default value is 0, and attempts to set it
+  to non-zero values in mallopt will fail.
+*/
+
+#define M_MMAP_MAX             -4
+
+#ifndef DEFAULT_MMAP_MAX
+#define DEFAULT_MMAP_MAX       (65536)
+#endif
+
+#ifdef __cplusplus
+};  /* end of extern "C" */
+#endif
+
+/* 
+  ========================================================================
+  To make a fully customizable malloc.h header file, cut everything
+  above this line, put into file malloc.h, edit to suit, and #include it 
+  on the next line, as well as in programs that use this malloc.
+  ========================================================================
+*/
+
+/* #include "malloc.h" */
+
+/* --------------------- public wrappers ---------------------- */
+
+#ifdef USE_PUBLIC_MALLOC_WRAPPERS
+
+/* DL_STATIC used to make functions (deep down) consistent
+ * with prototypes (otherwise the prototypes are static
+ * with USE_PUBLIC_MALLOC_WRAPPERS, but the functions aren't).
+ * The gcc compiler doesn't care, but the HP-UX compiler does.
+ */
+#define DL_STATIC static
+
+/* Declare all routines as internal */
+#if __STD_C
+static Void_t*  mALLOc(size_t) __attribute_malloc__;
+static void     fREe(Void_t*);
+static Void_t*  rEALLOc(Void_t*, size_t) __attribute_malloc__;
+static Void_t*  mEMALIGn(size_t, size_t) __attribute_malloc__;
+static int      posix_mEMALIGn(Void_t**, size_t, size_t);
+static Void_t*  vALLOc(size_t) __attribute_malloc__;
+static Void_t*  pVALLOc(size_t) __attribute_malloc__;
+static Void_t*  cALLOc(size_t, size_t) __attribute_malloc__;
+static int      mTRIm(size_t);
+static size_t   mUSABLe(Void_t*);
+static void     mSTATs();
+static int      mALLOPt(int, int);
+static struct mallinfo mALLINFo(void);
+#else
+static Void_t*  mALLOc();
+static void     fREe();
+static Void_t*  rEALLOc();
+static Void_t*  mEMALIGn();
+static int      posix_mEMALIGn();
+static Void_t*  vALLOc();
+static Void_t*  pVALLOc();
+static Void_t*  cALLOc();
+static int      mTRIm();
+static size_t   mUSABLe();
+static void     mSTATs();
+static int      mALLOPt();
+static struct mallinfo mALLINFo();
+#endif
+
+/*
+  MALLOC_PREACTION and MALLOC_POSTACTION should be
+  defined to return 0 on success, and nonzero on failure.
+  The return value of MALLOC_POSTACTION is currently ignored
+  in wrapper functions since there is no reasonable default
+  action to take on failure.
+*/
+
+
+#ifdef USE_MALLOC_LOCK
+
+# ifdef WIN32
+
+static int mALLOC_MUTEx;
+#define MALLOC_PREACTION   slwait(&mALLOC_MUTEx)
+#define MALLOC_POSTACTION  slrelease(&mALLOC_MUTEx)
+int dnmalloc_pthread_init(void) { return 0; }
+
+# elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
+
+#   if defined(__NetBSD__)
+#include <reentrant.h>
+extern int __isthreaded;
+static mutex_t thread_lock = MUTEX_INITIALIZER;
+#define _MALLOC_LOCK()   if (__isthreaded) mutex_lock(&thread_lock)
+#define _MALLOC_UNLOCK() if (__isthreaded) mutex_unlock(&thread_lock)
+void _malloc_prefork(void) {  _MALLOC_LOCK(); }
+void _malloc_postfork(void) { _MALLOC_UNLOCK(); }
+#   endif
+
+#   if defined(__OpenBSD__)
+extern int  __isthreaded;
+void   _thread_malloc_lock(void);
+void   _thread_malloc_unlock(void);
+#define _MALLOC_LOCK()           if (__isthreaded) _thread_malloc_lock()
+#define _MALLOC_UNLOCK()         if (__isthreaded) _thread_malloc_unlock()
+#   endif
+
+#   if defined(__FreeBSD__)
+extern int      __isthreaded;
+struct _spinlock {
+	volatile long	access_lock;
+	volatile long	lock_owner;
+	volatile char	*fname;
+	volatile int	lineno;
+};
+typedef struct _spinlock spinlock_t;
+#define	_SPINLOCK_INITIALIZER	{ 0, 0, 0, 0 }
+void	_spinlock(spinlock_t *);
+void	_spinunlock(spinlock_t *);
+/* # include "/usr/src/lib/libc/include/spinlock.h" */
+static spinlock_t thread_lock   = _SPINLOCK_INITIALIZER;
+spinlock_t *__malloc_lock       = &thread_lock;
+#define _MALLOC_LOCK()           if (__isthreaded) _spinlock(&thread_lock)
+#define _MALLOC_UNLOCK()         if (__isthreaded) _spinunlock(&thread_lock)
+#   endif
+
+/* Common for all three *BSD
+ */
+static int malloc_active = 0;
+static int dnmalloc_mutex_lock()
+{
+  _MALLOC_LOCK();
+  if (!malloc_active)
+    {
+      ++malloc_active;
+      return 0;
+    }
+  assert(malloc_active == 0);
+  _MALLOC_UNLOCK();
+  errno = EDEADLK;
+  return 1;
+}
+static int dnmalloc_mutex_unlock()
+{
+  --malloc_active;
+  _MALLOC_UNLOCK();
+  return 0;
+}
+#define MALLOC_PREACTION   dnmalloc_mutex_lock()
+#define MALLOC_POSTACTION  dnmalloc_mutex_unlock()
+int dnmalloc_pthread_init(void) { return 0; }
+
+# else
+
+/* Wrapping malloc with pthread_mutex_lock/pthread_mutex_unlock
+ *
+ * Works fine on linux (no malloc in pthread_mutex_lock)
+ * Works with on HP-UX if initialized after entering main()
+ */ 
+#include <pthread.h>
+static int malloc_active      = 0;
+void dnmalloc_fork_prepare(void);
+void dnmalloc_fork_parent(void);
+void dnmalloc_fork_child(void);
+
+#if !defined(__linux__)
+
+static pthread_mutex_t mALLOC_MUTEx;
+pthread_once_t dnmalloc_once_control = PTHREAD_ONCE_INIT;
+static int dnmalloc_use_mutex = 0;
+static void dnmalloc_pthread_init_int(void)
+{
+  pthread_mutexattr_t   mta;
+  pthread_mutexattr_init(&mta);
+  pthread_mutexattr_settype(&mta, PTHREAD_MUTEX_RECURSIVE);
+  pthread_mutex_init(&(mALLOC_MUTEx), &mta);
+  pthread_mutexattr_destroy(&mta);       
+  pthread_atfork(dnmalloc_fork_prepare, 
+		 dnmalloc_fork_parent,
+		 dnmalloc_fork_child);
+  dnmalloc_use_mutex = 1;
+}
+int dnmalloc_pthread_init(void)
+{
+  return pthread_once(&dnmalloc_once_control, dnmalloc_pthread_init_int);
+}
+
+#else
+
+static pthread_mutex_t mALLOC_MUTEx = PTHREAD_MUTEX_INITIALIZER;
+static int dnmalloc_use_mutex = 1;
+int dnmalloc_pthread_init(void) { 
+  return pthread_atfork(dnmalloc_fork_prepare, 
+			dnmalloc_fork_parent,
+			dnmalloc_fork_child); 
+}
+#endif /* !defined(__linux__) */
+
+void dnmalloc_fork_prepare(void) { 
+  if (dnmalloc_use_mutex) 
+    pthread_mutex_lock(&mALLOC_MUTEx);
+}
+void dnmalloc_fork_parent(void) { 
+  if (dnmalloc_use_mutex)
+    pthread_mutex_unlock(&mALLOC_MUTEx); 
+}
+void dnmalloc_fork_child(void) { 
+#ifdef __GLIBC__
+  if (dnmalloc_use_mutex)
+    pthread_mutex_init(&mALLOC_MUTEx, NULL); 
+#else
+  if (dnmalloc_use_mutex)
+    pthread_mutex_unlock(&mALLOC_MUTEx); 
+#endif
+}
+static int dnmalloc_mutex_lock(pthread_mutex_t *mutex)
+{
+  if (dnmalloc_use_mutex)
+    {
+      int rc = pthread_mutex_lock(mutex);
+      if (rc == 0)
+	{
+	  if (!malloc_active)
+	    {
+	      ++malloc_active;
+	      return 0;
+	    }
+	  assert(malloc_active == 0);
+	  (void) pthread_mutex_unlock(mutex);
+	  errno = EDEADLK;
+	  return 1;
+	}
+      return rc;
+    }
+  return 0;
+}
+static int dnmalloc_mutex_unlock(pthread_mutex_t *mutex)
+{
+  if (dnmalloc_use_mutex)
+    {
+      --malloc_active;
+      return pthread_mutex_unlock(mutex);
+    }
+  return 0;
+}
+# define MALLOC_PREACTION   dnmalloc_mutex_lock(&mALLOC_MUTEx)
+# define MALLOC_POSTACTION  dnmalloc_mutex_unlock(&mALLOC_MUTEx)
+
+# endif
+
+#else
+
+/* Substitute anything you like for these */
+
+# define MALLOC_PREACTION   (0)
+# define MALLOC_POSTACTION  (0)
+int dnmalloc_pthread_init(void) { return 0; }
+
+#endif /* USE_MALLOC_LOCK */
+
+Void_t* public_mALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION == 0) {
+    m = mALLOc(bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+void public_fREe(Void_t* m) {
+  if (MALLOC_PREACTION == 0) {
+    fREe(m);
+    (void) MALLOC_POSTACTION;
+  }
+}
+
+Void_t* public_rEALLOc(Void_t* m, size_t bytes) {
+  if (MALLOC_PREACTION == 0) {
+    m = rEALLOc(m, bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+Void_t* public_mEMALIGn(size_t alignment, size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION == 0) {
+    m = mEMALIGn(alignment, bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+int public_posix_mEMALIGn(Void_t**memptr, size_t alignment, size_t bytes) {
+  int m, ret;
+  if ((ret = MALLOC_PREACTION) == 0) {
+    m = posix_mEMALIGn(memptr, alignment, bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return ret;
+}
+
+Void_t* public_vALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION == 0) {
+    m = vALLOc(bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+Void_t* public_pVALLOc(size_t bytes) {
+  Void_t* m;
+  if (MALLOC_PREACTION == 0) {
+    m = pVALLOc(bytes);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+Void_t* public_cALLOc(size_t n, size_t elem_size) {
+  Void_t* m;
+  if (MALLOC_PREACTION == 0) {
+    m = cALLOc(n, elem_size);
+    (void) MALLOC_POSTACTION;
+    return m;
+  }
+  return 0;
+}
+
+int public_mTRIm(size_t s) {
+  int result;
+  if (MALLOC_PREACTION == 0) {
+    result = mTRIm(s);
+    (void) MALLOC_POSTACTION;
+    return result;
+  }
+  return 0;
+}
+
+size_t public_mUSABLe(Void_t* m) {
+  size_t result;
+  if (MALLOC_PREACTION == 0) {
+    result = mUSABLe(m);
+    (void) MALLOC_POSTACTION;
+    return result;
+  }
+  return 0;
+}
+
+void public_mSTATs() {
+  if (MALLOC_PREACTION == 0) {
+    mSTATs();
+    (void) MALLOC_POSTACTION;
+  }
+}
+
+struct mallinfo public_mALLINFo() {
+  struct mallinfo m;
+  if (MALLOC_PREACTION == 0) {
+    m = mALLINFo();
+    (void) MALLOC_POSTACTION;
+    return m;
+  } else {
+    struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    return nm;
+  }
+}
+
+int public_mALLOPt(int p, int v) {
+  int result;
+  if (MALLOC_PREACTION == 0) {
+    result = mALLOPt(p, v);
+    (void) MALLOC_POSTACTION;
+    return result;
+  }
+  return 0;
+}
+
+#else
+
+int dnmalloc_pthread_init(void) { return 0; }
+#define DL_STATIC
+
+#endif /* USE_PUBLIC_MALLOC_WRAPPERS */
+
+
+
+/* ------------- Optional versions of memcopy ---------------- */
+
+
+#if USE_MEMCPY
+
+/* 
+  Note: memcpy is ONLY invoked with non-overlapping regions,
+  so the (usually slower) memmove is not needed.
+*/
+
+#define MALLOC_COPY(dest, src, nbytes)  memcpy(dest, src, nbytes)
+#define MALLOC_ZERO(dest, nbytes)       memset(dest, 0,   nbytes)
+
+#else /* !USE_MEMCPY */
+
+/* Use Duff's device for good zeroing/copying performance. */
+
+#define MALLOC_ZERO(charp, nbytes)                                            \
+do {                                                                          \
+  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \
+  CHUNK_SIZE_T  mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mzp++ = 0;                                             \
+    case 7:           *mzp++ = 0;                                             \
+    case 6:           *mzp++ = 0;                                             \
+    case 5:           *mzp++ = 0;                                             \
+    case 4:           *mzp++ = 0;                                             \
+    case 3:           *mzp++ = 0;                                             \
+    case 2:           *mzp++ = 0;                                             \
+    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \
+  }                                                                           \
+} while(0)
+
+#define MALLOC_COPY(dest,src,nbytes)                                          \
+do {                                                                          \
+  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \
+  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \
+  CHUNK_SIZE_T  mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \
+  long mcn;                                                                   \
+  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \
+  switch (mctmp) {                                                            \
+    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \
+    case 7:           *mcdst++ = *mcsrc++;                                    \
+    case 6:           *mcdst++ = *mcsrc++;                                    \
+    case 5:           *mcdst++ = *mcsrc++;                                    \
+    case 4:           *mcdst++ = *mcsrc++;                                    \
+    case 3:           *mcdst++ = *mcsrc++;                                    \
+    case 2:           *mcdst++ = *mcsrc++;                                    \
+    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \
+  }                                                                           \
+} while(0)
+
+#endif
+
+/* ------------------ MMAP support ------------------  */
+
+
+#if defined(HAVE_FCNTL_H)
+#include <fcntl.h>
+#endif
+
+#if defined(HAVE_SYS_MMAN_H)
+#include <sys/mman.h>
+#endif
+
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/* 
+   Nearly all versions of mmap support MAP_ANONYMOUS, 
+   so the following is unlikely to be needed, but is
+   supplied just in case.
+*/
+
+#ifndef MAP_ANONYMOUS
+
+/* rw 19.05.2008 changed to avoid cached file descriptor, untested 
+ */
+void * anon_mmap (void *addr, size_t length, int prot, int flags)
+{
+  void * retval   = NULL;
+  int dev_zero_fd = -1; /* File descriptor for /dev/zero. */
+
+  dev_zero_fd = open("/dev/zero", O_RDWR);
+  if (dev_zero_fd >= 0)
+    {
+      retval = mmap((addr), (size), (prot), (flags), dev_zero_fd, 0);
+      /* closing the file descriptor does not unmap the region */
+      close(dev_zero_fd); 
+    }
+  return retval;
+}
+  
+#define MMAP(addr, size, prot, flags) \
+  (anon_mmap((addr), (size), (prot), (flags)))
+
+
+#else /* have MAP_ANONYMOUS */
+
+#if !defined(MAP_32BIT) && defined(MAP_ADDR32)
+#define MAP_32BIT MAP_ADDR32
+#endif
+
+#if defined(MAP_32BIT)
+#define MMAP(addr, size, prot, flags) \
+ (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS|MAP_32BIT, -1, 0))
+#elif defined(__sun)
+/* 
+ * Hint an address within 32bit address space
+ */
+#define MMAP(addr, size, prot, flags) \
+ (mmap((void*)0xC0000000, (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+#else
+/* *BSD */
+#define MMAP(addr, size, prot, flags) \
+ (mmap((void*)0x80000000, (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0))
+#endif
+
+#endif /* have MAP_ANONYMOUS */
+
+
+/*
+  -----------------------  Chunk representations -----------------------
+*/
+
+typedef void * mchunkptr;
+
+struct chunkinfo {
+   INTERNAL_SIZE_T      prev_size;  /* Size of previous in bytes          */
+   INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */
+   INTERNAL_SIZE_T      req;        /* Original request size, for guard.  */
+   struct chunkinfo* hash_next;     /* contains a pointer to the next chunk 
+                                       in the linked list if the hash
+                                       value is the same as the chunk     */
+   struct chunkinfo* fd;	    /* double links -- used only if free. */
+   struct chunkinfo* bk;
+   mchunkptr chunk;
+};
+
+typedef struct chunkinfo* chunkinfoptr;
+
+struct cireginfo {
+	unsigned long position;
+	unsigned long *freebitmap;
+	struct cireginfo* next;
+	struct chunkinfo *freelist;
+	struct chunkinfo *begin; 
+	unsigned long freecounter; 
+};
+
+/*
+  ---------- Size and alignment checks and conversions ----------
+*/
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk(p) (p->chunk)
+
+
+#define chunk2mem(p)   (chunk(p))
+#define mem2chunk(mem) (hashtable_lookup(mem))
+
+/* The smallest possible chunk */
+#define MIN_CHUNK_SIZE        16
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+
+#define MINSIZE  \
+  (CHUNK_SIZE_T)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
+
+/* Check if m has acceptable alignment */
+
+#define aligned_OK(m)  (((PTR_UINT)((m)) & (MALLOC_ALIGN_MASK)) == 0)
+
+#define GUARD_SIZE 4
+
+/* 
+   Check if a request is so large that it would wrap around zero when
+   padded and aligned. To simplify some other code, the bound is made
+   low enough so that adding MINSIZE will also not wrap around zero.
+
+   Make it 4*MINSIZE.
+*/
+
+#define REQUEST_OUT_OF_RANGE(req)                                 \
+  ((CHUNK_SIZE_T)(req) >=                                         \
+   (CHUNK_SIZE_T)(INTERNAL_SIZE_T)(-4 * MINSIZE))    
+
+/* pad request bytes into a usable size -- internal version */
+
+#define request2size(req)                                         \
+  (((req) + GUARD_SIZE + MALLOC_ALIGN_MASK >= MINSIZE)  ?         \
+   ((req) + GUARD_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK :\
+   MINSIZE)
+
+/*  Same, except also perform argument check */
+
+#define checked_request2size(req, sz)                             \
+  if (!REQUEST_OUT_OF_RANGE(req)) {                               \
+    (sz) = request2size(req);                                     \
+    assert((sz-req) >= GUARD_SIZE);                               \
+  } else {                                                        \
+    MALLOC_FAILURE_ACTION;                                        \
+    return 0;                                                     \
+  }
+
+#if PARANOIA > 2
+static char * guard_set_p;
+static char * guard_set_q;
+
+#define guard_set(guard, P, request, sz)			  \
+  assert((sz-request) >= GUARD_SIZE);                             \
+  guard_set_p = (char*)(chunk(P));                                \
+  guard_set_p += request;                                         \
+  guard_set_q = (char*)(guard);                                   \
+  *guard_set_p = *guard_set_q; ++guard_set_p; ++guard_set_q;      \
+  *guard_set_p = *guard_set_q; ++guard_set_p; ++guard_set_q;      \
+  *guard_set_p = *guard_set_q; ++guard_set_p; ++guard_set_q;      \
+  *guard_set_p = *guard_set_q;                                    \
+  (P)->req = request
+ 
+#define guard_check(guard, P)				          \
+  assert(0 == memcmp((((char *)chunk(P))+(P)->req),(void*)(guard),GUARD_SIZE));
+
+#else
+#define guard_set(guard, P, request, sz) ((void)0)
+#define guard_check(guard, P) ((void)0)
+#endif /* PARANOIA > 2 */
+
+/* dnmalloc forward declarations */
+static char * dnmalloc_arc4random(void);
+static void dnmalloc_init (void);
+static void malloc_mmap_state(void);
+static void cireg_extend (void);
+static chunkinfoptr cireg_getfree (void);
+static void hashtable_add (chunkinfoptr ci);
+static void hashtable_insert (chunkinfoptr ci_orig, chunkinfoptr ci_insert);
+static void hashtable_remove (mchunkptr p);              
+static void hashtable_skiprm (chunkinfoptr ci_orig, chunkinfoptr ci_todelete);
+static chunkinfoptr hashtable_lookup (mchunkptr p);
+static chunkinfoptr next_chunkinfo (chunkinfoptr ci);          
+static chunkinfoptr prev_chunkinfo (chunkinfoptr ci);
+
+
+
+/*
+  --------------- Physical chunk operations ---------------
+*/
+
+
+/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */
+#define PREV_INUSE 0x1
+
+/* extract inuse bit of previous chunk */
+#define prev_inuse(p)       ((p)->size & PREV_INUSE)
+
+/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
+#define IS_MMAPPED 0x2
+
+/* check for mmap()'ed chunk */
+#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED)
+
+
+/* size field is or'ed when the chunk is in use */
+#define INUSE 0x4
+
+/* extract inuse bit of chunk */
+#define inuse(p)       ((p)->size & INUSE)
+
+/* 
+  Bits to mask off when extracting size 
+
+  Note: IS_MMAPPED is intentionally not masked off from size field in
+  macros for which mmapped chunks should never be seen. This should
+  cause helpful core dumps to occur if it is tried by accident by
+  people extending or adapting this malloc.
+*/
+#define SIZE_BITS (PREV_INUSE|IS_MMAPPED|INUSE)
+
+/* Bits to mask off when extracting size of chunks for macros which do not use mmap */
+#define SIZE_NOMMAP (PREV_INUSE|INUSE)
+
+/* Get size, ignoring use bits */
+#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
+
+/* Ptr to chunkinfo of next physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & SIZE_NOMMAP) ))
+
+/* Treat space at ptr + offset as a chunk */
+#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s)))
+
+/* set/clear chunk as being inuse without otherwise disturbing */
+#define set_inuse(p) ((p)->size |= INUSE)
+
+#define clear_inuse(p) ((p)->size &= ~(INUSE))
+
+#define set_previnuse(p) ((p)->size |= PREV_INUSE)
+
+#define clear_previnuse(p) ((p)->size &= ~(PREV_INUSE))
+
+static void set_previnuse_next (chunkinfoptr p)
+{
+   chunkinfoptr q;
+   q = next_chunkinfo (p);
+   if (q)
+      set_previnuse (q);
+}
+
+#define set_all_inuse(p) \
+set_inuse(p); \
+set_previnuse_next(p);
+
+
+/* Set size at head, without disturbing its use bit */
+#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_NOMMAP) | (s)))
+
+/* Set size/use field */
+#define set_head(p, s)       ((p)->size = (s))
+
+/*
+  Bins
+
+    An array of bin headers for free chunks. Each bin is doubly
+    linked.  The bins are approximately proportionally (log) spaced.
+    There are a lot of these bins (128). This may look excessive, but
+    works very well in practice.  Most bins hold sizes that are
+    unusual as malloc request sizes, but are more usual for fragments
+    and consolidated sets of chunks, which is what these bins hold, so
+    they can be found quickly.  All procedures maintain the invariant
+    that no consolidated chunk physically borders another one, so each
+    chunk in a list is known to be preceeded and followed by either
+    inuse chunks or the ends of memory.
+
+    Chunks in bins are kept in size order, with ties going to the
+    approximately least recently used chunk. Ordering isn't needed
+    for the small bins, which all contain the same-sized chunks, but
+    facilitates best-fit allocation for larger chunks. These lists
+    are just sequential. Keeping them in order almost never requires
+    enough traversal to warrant using fancier ordered data
+    structures.  
+
+    Chunks of the same size are linked with the most
+    recently freed at the front, and allocations are taken from the
+    back.  This results in LRU (FIFO) allocation order, which tends
+    to give each chunk an equal opportunity to be consolidated with
+    adjacent freed chunks, resulting in larger free chunks and less
+    fragmentation.
+
+    To simplify use in double-linked lists, each bin header acts
+    as a malloc_chunk. This avoids special-casing for headers.
+    But to conserve space and improve locality, we allocate
+    only the fd/bk pointers of bins, and then use repositioning tricks
+    to treat these as the fields of a malloc_chunk*.  
+*/
+
+typedef struct chunkinfo* mbinptr;
+
+/* addressing -- note that bin_at(0) does not exist */
+#define bin_at(m, i) (&(m)->bins[i])
+
+/* analog of ++bin */
+#define next_bin(b)  (b+1)
+
+/* Reminders about list directionality within bins */
+#define first(b)     ((b)->fd)
+#define last(b)      ((b)->bk)
+
+/* Take a chunk off a bin list */
+#define unlink(P, BK, FD) {                                            \
+  FD = P->fd;                                                          \
+  BK = P->bk;                                                          \
+  FD->bk = BK;                                                         \
+  BK->fd = FD;                                                         \
+}
+
+/*
+  Indexing
+
+    Bins for sizes < 512 bytes contain chunks of all the same size, spaced
+    8 bytes apart. Larger bins are approximately logarithmically spaced:
+
+    64 bins of size       8
+    32 bins of size      64
+    16 bins of size     512
+     8 bins of size    4096
+     4 bins of size   32768
+     2 bins of size  262144
+     1 bin  of size what's left
+
+    The bins top out around 1MB because we expect to service large
+    requests via mmap.
+*/
+
+#define NBINS              96
+#define NSMALLBINS         32
+#define SMALLBIN_WIDTH      8
+#define MIN_LARGE_SIZE    256
+
+#define in_smallbin_range(sz)  \
+  ((CHUNK_SIZE_T)(sz) < (CHUNK_SIZE_T)MIN_LARGE_SIZE)
+
+#define smallbin_index(sz)     (((unsigned)(sz)) >> 3)
+
+/*
+  Compute index for size. We expect this to be inlined when
+  compiled with optimization, else not, which works out well.
+*/
+static int largebin_index(size_t sz) {
+
+  unsigned long  xx = sz >> SMALLBIN_WIDTH; 
+
+  if (xx < 0x10000) 
+    {
+      unsigned int  m;           /* bit position of highest set bit of m */
+      
+      /* On intel, use BSRL instruction to find highest bit */
+#if defined(__GNUC__) && defined(i386) && !defined(USE_UNO)
+
+      unsigned int  x = (unsigned int) xx;
+
+      __asm__("bsrl %1,%0\n\t"
+	      : "=r" (m) 
+	      : "rm"  (x));
+
+#elif defined(__GNUC__) && defined(x86_64) && !defined(USE_UNO)
+
+      __asm__("bsrq %1,%0\n\t"
+              : "=r" (m)
+              : "rm"  (xx));
+
+#else
+
+      /* Taken from Bit Twiddling Hacks
+       * http://graphics.stanford.edu/~seander/bithacks.html
+       * public domain
+       */
+      unsigned int  v  = (unsigned int) xx;
+      register unsigned int shift;
+      
+      m =     (v > 0xFFFF) << 4; v >>= m;
+      shift = (v > 0xFF  ) << 3; v >>= shift; m |= shift;
+      shift = (v > 0xF   ) << 2; v >>= shift; m |= shift;
+      shift = (v > 0x3   ) << 1; v >>= shift; m |= shift;
+      m |= (v >> 1);
+      
+#endif
+      
+      /* Use next 2 bits to create finer-granularity bins */
+      return NSMALLBINS + (m << 2) + ((sz >> (m + 6)) & 3);
+    }
+  else
+    {
+      return NBINS-1;
+    }
+}
+
+#define bin_index(sz) \
+ ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz))
+
+/*
+  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the
+  first bin that is maintained in sorted order. This must
+  be the smallest size corresponding to a given bin.
+
+  Normally, this should be MIN_LARGE_SIZE. But you can weaken
+  best fit guarantees to sometimes speed up malloc by increasing value.
+  Doing this means that malloc may choose a chunk that is 
+  non-best-fitting by up to the width of the bin.
+
+  Some useful cutoff values:
+      512 - all bins sorted
+     2560 - leaves bins <=     64 bytes wide unsorted  
+    12288 - leaves bins <=    512 bytes wide unsorted
+    65536 - leaves bins <=   4096 bytes wide unsorted
+   262144 - leaves bins <=  32768 bytes wide unsorted
+       -1 - no bins sorted (not recommended!)
+*/
+
+/* #define FIRST_SORTED_BIN_SIZE 65536 */
+
+/*          12288 1m59 1m58 1m58
+ *           2560 1m56 1m59 1m57
+ * MIN_LARGE_SIZE 2m01 1m56 1m57
+ */
+#ifdef SAMHAIN
+#define FIRST_SORTED_BIN_SIZE 2560
+#else
+#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE
+#endif
+
+/*
+  Unsorted chunks
+
+    All remainders from chunk splits, as well as all returned chunks,
+    are first placed in the "unsorted" bin. They are then placed
+    in regular bins after malloc gives them ONE chance to be used before
+    binning. So, basically, the unsorted_chunks list acts as a queue,
+    with chunks being placed on it in free (and malloc_consolidate),
+    and taken off (to be either used or placed in bins) in malloc.
+*/
+
+/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */
+#define unsorted_chunks(M)          (bin_at(M, 1))
+
+/*
+  Top
+
+    The top-most available chunk (i.e., the one bordering the end of
+    available memory) is treated specially. It is never included in
+    any bin, is used only if no other chunk is available, and is
+    released back to the system if it is very large (see
+    M_TRIM_THRESHOLD).  Because top initially
+    points to its own bin with initial zero size, thus forcing
+    extension on the first malloc request, we avoid having any special
+    code in malloc to check whether it even exists yet. But we still
+    need to do so when getting memory from system, so we make
+    initial_top treat the bin as a legal but unusable chunk during the
+    interval between initialization and the first call to
+    sYSMALLOc. (This is somewhat delicate, since it relies on
+    the 2 preceding words to be zero during this interval as well.)
+*/
+
+/* Conveniently, the unsorted bin can be used as dummy top on first call */
+#define initial_top(M)              (unsorted_chunks(M))
+
+/*
+  Binmap
+
+    To help compensate for the large number of bins, a one-level index
+    structure is used for bin-by-bin searching.  `binmap' is a
+    bitvector recording whether bins are definitely empty so they can
+    be skipped over during during traversals.  The bits are NOT always
+    cleared as soon as bins are empty, but instead only
+    when they are noticed to be empty during traversal in malloc.
+*/
+
+/* Conservatively use 32 bits per map word, even if on 64bit system */
+#define BINMAPSHIFT      5
+#define BITSPERMAP       (1U << BINMAPSHIFT)
+#define BINMAPSIZE       (NBINS / BITSPERMAP)
+
+#define idx2block(i)     ((i) >> BINMAPSHIFT)
+#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1))))
+
+#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i))
+#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
+#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i))
+
+/*
+  Fastbins
+
+    An array of lists holding recently freed small chunks.  Fastbins
+    are not doubly linked.  It is faster to single-link them, and
+    since chunks are never removed from the middles of these lists,
+    double linking is not necessary. Also, unlike regular bins, they
+    are not even processed in FIFO order (they use faster LIFO) since
+    ordering doesn't much matter in the transient contexts in which
+    fastbins are normally used.
+
+    Chunks in fastbins keep their inuse bit set, so they cannot
+    be consolidated with other free chunks. malloc_consolidate
+    releases all chunks in fastbins and consolidates them with
+    other free chunks. 
+*/
+
+typedef struct chunkinfo* mfastbinptr;
+
+/* offset 2 to use otherwise unindexable first 2 bins */
+#define fastbin_index(sz)        ((((unsigned int)(sz)) >> 3) - 2)
+
+/* The maximum fastbin request size we support */
+#define MAX_FAST_SIZE     80
+
+#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1)
+
+/*
+  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free()
+  that triggers automatic consolidation of possibly-surrounding
+  fastbin chunks. This is a heuristic, so the exact value should not
+  matter too much. It is defined at half the default trim threshold as a
+  compromise heuristic to only attempt consolidation if it is likely
+  to lead to trimming. However, it is not dynamically tunable, since
+  consolidation reduces fragmentation surrounding loarge chunks even 
+  if trimming is not used.
+*/
+
+#define FASTBIN_CONSOLIDATION_THRESHOLD  \
+  ((unsigned long)(DEFAULT_TRIM_THRESHOLD) >> 1)
+
+/*
+  Since the lowest 2 bits in max_fast don't matter in size comparisons, 
+  they are used as flags.
+*/
+
+/*
+  ANYCHUNKS_BIT held in max_fast indicates that there may be any
+  freed chunks at all. It is set true when entering a chunk into any
+  bin.
+*/
+
+#define ANYCHUNKS_BIT        (1U)
+
+#define have_anychunks(M)     (((M)->max_fast &  ANYCHUNKS_BIT))
+#define set_anychunks(M)      ((M)->max_fast |=  ANYCHUNKS_BIT)
+#define clear_anychunks(M)    ((M)->max_fast &= ~ANYCHUNKS_BIT)
+
+/*
+  FASTCHUNKS_BIT held in max_fast indicates that there are probably
+  some fastbin chunks. It is set true on entering a chunk into any
+  fastbin, and cleared only in malloc_consolidate.
+*/
+
+#define FASTCHUNKS_BIT        (2U)
+
+#define have_fastchunks(M)   (((M)->max_fast &  FASTCHUNKS_BIT))
+#define set_fastchunks(M)    ((M)->max_fast |=  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+#define clear_fastchunks(M)  ((M)->max_fast &= ~(FASTCHUNKS_BIT))
+
+/* 
+   Set value of max_fast. 
+   Use impossibly small value if 0.
+*/
+
+#define set_max_fast(M, s) \
+  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \
+  ((M)->max_fast &  (FASTCHUNKS_BIT|ANYCHUNKS_BIT))
+
+#define get_max_fast(M) \
+  ((M)->max_fast & ~(FASTCHUNKS_BIT | ANYCHUNKS_BIT))
+
+
+/*
+  morecore_properties is a status word holding dynamically discovered
+  or controlled properties of the morecore function
+*/
+
+#define MORECORE_CONTIGUOUS_BIT  (1U)
+
+#define contiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT))
+#define noncontiguous(M) \
+        (((M)->morecore_properties &  MORECORE_CONTIGUOUS_BIT) == 0)
+#define set_contiguous(M) \
+        ((M)->morecore_properties |=  MORECORE_CONTIGUOUS_BIT)
+#define set_noncontiguous(M) \
+        ((M)->morecore_properties &= ~MORECORE_CONTIGUOUS_BIT)
+
+#define MORECORE_32BIT_BIT  (2U)
+
+#define morecore32bit(M) \
+        (((M)->morecore_properties &  MORECORE_32BIT_BIT))
+#define nonmorecore32bit(M) \
+        (((M)->morecore_properties &  MORECORE_32BIT_BIT) == 0)
+#define set_morecore32bit(M) \
+        ((M)->morecore_properties |=  MORECORE_32BIT_BIT)
+#define set_nonmorecore32bit(M) \
+        ((M)->morecore_properties &= ~MORECORE_32BIT_BIT)
+
+
+
+/* ----------------- dnmalloc -------------------- */
+
+/* size of pages */
+#define PGSIZE malloc_getpagesize
+/* pointer size */
+#define PTRSIZE sizeof(long)
+
+
+
+/* TODO: mmapped chunks are always multiples of pagesize -> we're wasting 
+   address space: the hashtable has granularity of 16*8, set it to something 
+   closer to pagesize for mmapped chunks (current waste: 32 positions/mmapped 
+   page) 
+*/
+
+/* The maximum heap size that dnmalloc can operate with
+ * represented in hex to avoid annoying gcc warning
+ *
+ * Avoid integer overflow, cover complete 32bit address 
+ * space for portability. With deferred allocation, the
+ * hashtable size is a non-issue.
+ */
+#define HEAPMAXSIZE_HALF 0x80000000UL
+
+/* How many elements are stored in the linked list */
+#define LINKEDLSTELS 8
+
+/* Minimum size of a chunk */
+#define MINCHUNKSIZE 16
+
+/* The amount of hashtable entries for each page:
+Pagesize divded by the numer of elements in the linkedlists
+divided by the minimum chunk size
+*/
+#define CHUNKINFOPAGE (PGSIZE / LINKEDLSTELS / MINCHUNKSIZE)
+
+/* The amount of hashtable entries needed to manage the memory:
+Maximum heap size divided by page size multiplied by the amount
+of chunk info's per page 
+*/
+#define AMOUNTHASH ((HEAPMAXSIZE_HALF / PGSIZE) * CHUNKINFOPAGE * 2)
+
+/* Initial size of the map for the hashtable 
+Amount of entries muliplied by pointer size
+*/
+#define  HASHTABLESIZE (AMOUNTHASH * PTRSIZE)
+
+/* Amount of free chunks that the system should allocate at the start */
+#define NUMBER_FREE_CHUNKS 32768
+
+/* Initial size of the chunk info region, 
+also used when growing the region */
+#define CIREGSIZE (NUMBER_FREE_CHUNKS * sizeof(struct chunkinfo))
+
+/* Start address of the heap */
+char *startheap;
+
+/* pointer to the hashtable: struct chunkinfo **hashtable -> *hashtable[] */
+chunkinfoptr *hashtable;
+
+/* Current chunkinfo region */
+struct cireginfo *currciinfo = 0;
+struct cireginfo *firstciinfo = 0;
+
+unsigned long totalcictr = 0;
+
+
+/* Initialize the area for chunkinfos and the hashtable and protect 
+ * it with non-writable pages 
+ */
+static void
+dnmalloc_init ()
+{
+   void *hashtb;
+   int mprot;
+   int flags = MAP_PRIVATE;
+
+   /* Allocate the malloc_state struct */
+   malloc_mmap_state();
+
+   /* Use MAP_NORESERVE if available (Solaris, HP-UX; most other
+    * systems use defered allocation anyway.
+    */
+#ifdef MAP_NORESERVE
+   flags |= MAP_NORESERVE;
+#endif
+
+   /* Always start at 0, hashtable covers whole 32bit address space
+    */
+#define STARTHEAP_IS_ZERO
+   startheap = 0;
+
+   /* Map space for the hashtable */
+#if PARANOIA > 1
+   hashtb = MMAP(0, HASHTABLESIZE+(2*PGSIZE), PROT_READ|PROT_WRITE, flags);
+#else
+   hashtb = MMAP(0, HASHTABLESIZE+PGSIZE, PROT_READ|PROT_WRITE, flags);
+#endif
+
+#ifdef NDEBUG
+   if (hashtb == MAP_FAILED) {
+      fprintf (stderr, "Couldn't mmap hashtable: %s\n", strerror (errno));
+      abort ();
+   }
+#else
+   assert(hashtb != MAP_FAILED);
+#endif
+
+   /* Protect the hashtable with non-writable pages */
+   mprot = mprotect(hashtb, (size_t) PGSIZE, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+     fprintf (stderr, "Couldn't mprotect first non-rw page for hashtable: %s\n",
+	      strerror (errno));
+     abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+
+   /* HP-UX: Cannot do arithmetic with pointers to objects of unknown size. */
+   hashtable = (chunkinfoptr *) (((char*)hashtb) + PGSIZE);
+
+   /* Protect the hashtable with non-writable pages */
+#if PARANOIA > 1
+   mprot = mprotect((void*)((char*)hashtb+HASHTABLESIZE+PGSIZE), (size_t) PGSIZE, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+     fprintf (stderr, "Couldn't mprotect last non-rw page for hashtable: %s\n",
+	      strerror (errno));
+     abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+#endif
+}
+
+
+
+/* Extend the region for chunk infos by mapping more memory before the region */
+static void
+cireg_extend ()
+{
+   void *newcireg;
+   int mprot;
+   struct cireginfo *tempciinfo = 0;
+   
+#if PARANOIA > 1
+   newcireg = MMAP(0, CIREGSIZE+(2*PGSIZE), PROT_READ|PROT_WRITE, MAP_PRIVATE);
+#else
+   newcireg = MMAP(0, CIREGSIZE+PGSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE);
+#endif
+
+#ifdef NDEBUG
+   if (newcireg == MAP_FAILED)
+   {
+	   fprintf (stderr, "Couldn't extend chunkinfo region: %s\n",
+		    strerror (errno));
+	   abort ();
+   }
+#else
+   assert(newcireg != MAP_FAILED);
+#endif
+   mprot = mprotect(newcireg, PGSIZE, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+	   fprintf (stderr, "Couldn't mprotect first non-rw page for extended region: %s\n",
+		    strerror (errno));
+	   abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+   newcireg = ((char*)newcireg)+PGSIZE;
+   
+#if PARANOIA > 1
+   mprot = mprotect((void*)((char*)newcireg+CIREGSIZE), (size_t) PGSIZE, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+     fprintf (stderr, "Couldn't mprotect last non-rw page for extended region: %s\n",
+	      strerror (errno));
+     abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+#endif
+
+   tempciinfo = currciinfo;
+   currciinfo = (struct cireginfo *) newcireg;
+   if (tempciinfo)
+	   tempciinfo->next = currciinfo;
+   currciinfo->position = 1;
+   currciinfo->freecounter = NUMBER_FREE_CHUNKS;
+   if (!firstciinfo)
+	   firstciinfo = currciinfo;
+   totalcictr++;
+}
+
+
+/* Get a free chunkinfo */
+static chunkinfoptr
+cireg_getfree ()
+{
+   chunkinfoptr freeci;
+   chunkinfoptr freelst = 0;
+   struct cireginfo *newciinfo = firstciinfo;
+   
+   if (newciinfo) {
+   	freelst = newciinfo->freelist;
+    
+	if (!freelst && newciinfo->next) {
+	  do {
+	    newciinfo = newciinfo->next;
+	    freelst = newciinfo->freelist;
+	  } while (!freelst && newciinfo->next);
+	}
+   }
+
+   /* Check if there are any free chunkinfos on the list of free chunkinfos */
+   if (freelst)
+   {
+      freeci = freelst;
+      newciinfo->freelist = freelst->fd;
+      newciinfo->freecounter--;
+      /* memset(freeci, 0, sizeof(struct chunkinfo)); */
+      freeci->prev_size = 0;
+      freeci->size      = 0;
+      freeci->req       = 0;
+      freeci->hash_next = NULL;
+      freeci->fd        = NULL;
+      freeci->bk        = NULL;
+      freeci->chunk     = NULL;
+      return (freeci);
+   }
+   else
+   {
+     /* No free chunkinfos, check if chunkinfo region still has place 
+      * for a chunkinfo. If not, extend the region. 
+      */
+     if (UNLIKELY(!currciinfo || currciinfo->position == NUMBER_FREE_CHUNKS))
+       cireg_extend ();
+     /* Get a chunkinfo from the chunkinfo region */
+     freeci = (chunkinfoptr) currciinfo + currciinfo->position; 
+     currciinfo->freecounter--;
+     currciinfo->position++;
+     return (freeci);
+   }
+}
+
+static void freeciregion(struct cireginfo *freeme) {
+  /* free the chunkinfo region */
+  struct cireginfo *newciinfo = firstciinfo;
+  struct cireginfo *prevciinfo = firstciinfo;
+  void *unmapme;
+  while (newciinfo && newciinfo != freeme) {
+    prevciinfo = newciinfo;
+    newciinfo = newciinfo->next;
+  }
+  assert(freeme == newciinfo); /* rw */
+  assert(newciinfo != NULL);   /* rw */
+  if (newciinfo)
+    prevciinfo->next = newciinfo->next;
+  unmapme = (void *) ((char*)freeme - PGSIZE);
+#if PARANOIA > 1
+  munmap(unmapme, CIREGSIZE+(2*PGSIZE));
+#else
+  munmap(unmapme, CIREGSIZE+PGSIZE);
+#endif
+}
+
+
+static void freecilst_add(chunkinfoptr p) {
+
+  struct cireginfo *newciinfo;
+  newciinfo = currciinfo;
+  if (((chunkinfoptr) newciinfo < p) && (p  <  (chunkinfoptr) (newciinfo+NUMBER_FREE_CHUNKS))) {
+    p->fd = newciinfo->freelist;
+    newciinfo->freelist = p;
+    newciinfo->freecounter++;
+  } else {
+    newciinfo = firstciinfo;
+    if (newciinfo) {
+      do {
+	if (((chunkinfoptr) newciinfo < p) && (p  <  (chunkinfoptr) (newciinfo+NUMBER_FREE_CHUNKS))) {
+	  p->fd = newciinfo->freelist;
+	  newciinfo->freelist = p;
+	  newciinfo->freecounter++;
+	  if (UNLIKELY(newciinfo->freecounter == NUMBER_FREE_CHUNKS))
+	    freeciregion(newciinfo);
+	  break;
+	}
+	newciinfo = newciinfo->next;
+      } while (newciinfo);
+    }
+  }
+}
+
+/* Calculate the hash table entry for a chunk */
+#ifdef STARTHEAP_IS_ZERO
+#define hash(p)  (((unsigned long) p) >> 7)
+#else
+#define hash(p)  (((unsigned long) p - (unsigned long) startheap) >> 7)
+#endif
+
+static void
+hashtable_add (chunkinfoptr ci)
+{
+   chunkinfoptr temp, next;
+   unsigned long hashval;
+   mchunkptr cic = chunk (ci);
+   
+   hashval = hash (cic);
+
+   if (hashval < AMOUNTHASH) {
+
+     temp = hashtable[hashval];
+
+#ifdef DNMALLOC_DEBUG
+     fprintf(stderr, "hashtable_add: %p, %lu\n", chunk(ci), hashval);
+#endif
+
+     /* If no pointer to a chunk info list is stored at this location 
+      * in the hashtable or if the chunk's address is smaller than the 
+      * one present, add the chunk to the front of the linked list
+      */
+     if (temp == 0 || chunk (temp) > cic)
+       {
+	 ci->hash_next = temp;
+	 hashtable[hashval] = ci;
+	 if (!temp) /* more likely case */
+	   goto out;
+	 temp->prev_size = chunksize(ci);
+	 return;
+       }
+     else
+       {
+	 /* We must place the chunk in the linked list for this hashentry
+	  * Loop to end of list or to a position where temp's chunk's address 
+	  * is larger than the new chunkinfo's chunk's address
+	  */
+	 if (!temp->hash_next || (chunk (temp->hash_next) > cic))
+	   {
+	     ci->hash_next = temp->hash_next;
+	     temp->hash_next = ci;
+	   }
+	 else
+	   {
+	     while ((temp->hash_next != 0) && (chunk (temp->hash_next) < cic))
+	       {
+		 temp = temp->hash_next;
+	       }
+	     /* Place in linked list if not already there */
+	     if (!temp->hash_next || !(chunk (temp->hash_next) == cic))
+	       {
+		 ci->hash_next = temp->hash_next;
+		 temp->hash_next = ci;
+	       }
+	   }
+       }
+   }
+   else {
+#ifdef DNMALLOC_CHECKS
+     if (hashval >= AMOUNTHASH) {
+       fprintf(stderr, "Dnmalloc error: trying to write outside of the bounds of the hashtable, this is definitely a bug, please email dnmalloc@fort-knox.org (hashval: %lu, AMOUNTHASH: %lu, HEAPMAXSIZE_HALF %lu PGSIZE %ld CHUNKINFOPAGE %ld chunk: %p, chunkinfo: %p, startheap: %p).\n", hashval, AMOUNTHASH, HEAPMAXSIZE_HALF, PGSIZE, CHUNKINFOPAGE, chunk(ci), ci, startheap);
+	   abort();
+     }
+#else
+     assert(hashval < AMOUNTHASH);
+#endif
+   }
+
+ out:
+   next = next_chunkinfo(ci);
+   if (!next)
+     return;
+   next->prev_size = chunksize(ci);
+}
+
+static void
+hashtable_insert (chunkinfoptr ci_orig, chunkinfoptr ci_insert)
+{
+   chunkinfoptr next;
+
+#ifdef DNMALLOC_DEBUG
+   fprintf(stderr, "hashtable_ins: %p, %lu\n", chunk(ci_insert), 
+	   (unsigned long)hash(chunk(ci_insert));
+#endif
+
+   if (hash(chunk(ci_orig)) != hash(chunk(ci_insert))) {
+      hashtable_add(ci_insert);  
+   }
+   else {
+      ci_insert->hash_next = ci_orig->hash_next;
+      ci_orig->hash_next = ci_insert;
+
+      /* added for prevsize */
+      if (!(ci_insert->hash_next))
+	      next = next_chunkinfo(ci_insert);
+      else
+	      next = ci_insert->hash_next;
+
+      if (!next)
+	{
+	  ci_insert->prev_size = chunksize(ci_orig);
+	}
+      else
+	{
+	  next->prev_size = chunksize(ci_insert);
+	  ci_insert->prev_size = chunksize(ci_orig);
+	}
+   }
+}
+
+static void
+hashtable_remove (mchunkptr p) 
+{
+  chunkinfoptr prevtemp, temp;
+  unsigned long hashval;
+  
+  hashval = hash (p);
+#ifdef DNMALLOC_DEBUG
+  fprintf(stderr, "hashtable_rem: %p, %lu\n", p, hashval);
+#endif
+  assert(hashval < AMOUNTHASH); /* rw */
+  prevtemp = temp = hashtable[hashval];
+  if (chunk (temp) == p) {
+    hashtable[hashval] = temp->hash_next;
+  } 
+  else
+    {
+      if (temp && chunk (temp) != p) {
+	do
+	  {
+	    prevtemp = temp;
+	    temp = temp->hash_next;
+	  } while (temp && chunk (temp) != p);
+      }
+#ifdef DNMALLOC_CHECKS
+      if (!temp) {
+	fprintf (stderr,
+		 "Dnmalloc error (hash_rm): could not find a chunkinfo for the chunk %p in the hashtable at entry %lu\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n",
+		 p, hashval);
+	abort();
+      }
+#else
+      assert(temp != NULL);
+#endif
+      prevtemp->hash_next = temp->hash_next;
+    }
+}
+
+/* mmapped chunks are multiples of pagesize, no hash_nexts, just remove from the hashtable */
+#define hashtable_remove_mmapped(p) hashtable[hash(p)] = 0;
+
+static void
+hashtable_skiprm (chunkinfoptr ci_orig, chunkinfoptr ci_todelete)
+{
+   unsigned long hashval;
+   chunkinfoptr next;
+   
+#ifdef DNMALLOC_DEBUG
+   fprintf(stderr, "hashtable_ski: %p, %lu\n", chunk(ci_todelete), hash(chunk(ci_todelete)));
+#endif
+
+   if (ci_orig->hash_next != ci_todelete) {
+     hashval = hash(chunk(ci_todelete));
+     assert(hashval < AMOUNTHASH); /* rw */
+#ifdef DNMALLOC_CHECKS
+     if (hashtable[hashval] != ci_todelete ) {
+	   fprintf(stderr, "Dnmalloc error: trying to delete wrong value (hash: %lu): ci_todelete: %p (%p), hashtable[hashval]: %p (%p)\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n", hashval, ci_todelete, chunk(ci_todelete), hashtable[hashval], chunk(hashtable[hashval]));
+     }
+#else
+     assert(hashtable[hashval] == ci_todelete);
+#endif
+     hashtable[hashval] = ci_todelete->hash_next;
+   }
+
+   else {
+     ci_orig->hash_next = ci_todelete->hash_next;
+     if (!ci_orig->hash_next) {
+       next = next_chunkinfo(ci_orig);
+     } else {
+       next = ci_orig->hash_next;
+     }
+     if (next)
+       next->prev_size = chunksize(ci_orig);
+
+   } 
+}
+
+
+static chunkinfoptr
+hashtable_lookup (mchunkptr p)
+{
+   chunkinfoptr ci;
+   unsigned long hashval;
+   
+   /* if we were called wrongly
+    * if ((char *) p < startheap) return 0;
+    */
+   if ((char *) p >= startheap)
+     {
+       hashval = hash (p);
+       assert(hashval < AMOUNTHASH); /* rw */
+       ci = hashtable[hashval];
+       if (ci && chunk (ci) == p)
+	 return ci;
+
+       if (ci) {
+	 do {
+	   ci = ci->hash_next;
+	 } while (ci && chunk (ci) != p);
+       }
+#ifdef DNMALLOC_CHECKS
+       // This should never occur but if it does, we'd like to know
+       if (!ci) {
+	 fprintf (stderr,
+		  "Dnmalloc error: could not find a chunkinfo for the chunk %p in the hashtable at entry %lu\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n",
+		  p, hashval);
+	 abort();
+       }
+#else
+       assert(ci != NULL);
+#endif
+       return ci;
+     }
+   return 0;
+}
+
+
+
+/*
+   ----------- Internal state representation and initialization -----------
+*/
+
+struct malloc_state {
+
+  /* The maximum chunk size to be eligible for fastbin */
+  INTERNAL_SIZE_T  max_fast;   /* low 2 bits used as flags */
+
+  /* Fastbins */
+  mfastbinptr      fastbins[NFASTBINS];
+
+  /* Base of the topmost chunk -- not otherwise kept in a bin */
+  chunkinfoptr        top;
+
+  /* The remainder from the most recent split of a small request */
+  chunkinfoptr        last_remainder;
+
+  /* Normal bins */
+  struct chunkinfo bins[NBINS];
+
+  /* Bitmap of bins. Trailing zero map handles cases of largest binned size */
+  unsigned int     binmap[BINMAPSIZE+1];
+
+  /* Tunable parameters */
+  CHUNK_SIZE_T     trim_threshold;
+  INTERNAL_SIZE_T  top_pad;
+  INTERNAL_SIZE_T  mmap_threshold;
+
+  /* Memory map support */
+  int              n_mmaps;
+  int              n_mmaps_max;
+  int              max_n_mmaps;
+
+  /* Cache malloc_getpagesize */
+  unsigned int     pagesize;  
+
+  /* Canary */
+  char             guard_stored[GUARD_SIZE];
+
+  /* Track properties of MORECORE */
+  unsigned int     morecore_properties;
+
+  /* Statistics */
+  INTERNAL_SIZE_T  mmapped_mem;
+  INTERNAL_SIZE_T  sbrked_mem;
+  INTERNAL_SIZE_T  max_sbrked_mem;
+  INTERNAL_SIZE_T  max_mmapped_mem;
+  INTERNAL_SIZE_T  max_total_mem;
+};
+
+typedef struct malloc_state *mstate;
+
+/* 
+   There is exactly one instance of this struct in this malloc.
+   If you are adapting this malloc in a way that does NOT use a static
+   malloc_state, you MUST explicitly zero-fill it before using. This
+   malloc relies on the property that malloc_state is initialized to
+   all zeroes (as is true of C statics).
+*/
+
+static struct malloc_state * av_ = NULL;  /* never directly referenced */
+
+/*
+   All uses of av_ are via get_malloc_state().
+   At most one "call" to get_malloc_state is made per invocation of
+   the public versions of malloc and free, but other routines
+   that in turn invoke malloc and/or free may call more then once. 
+   Also, it is called in check* routines if DEBUG is set.
+*/
+
+#define get_malloc_state() (av_)
+
+/*
+  Initialize a malloc_state struct.
+
+  This is called only from within malloc_consolidate, which needs
+  be called in the same contexts anyway.  It is never called directly
+  outside of malloc_consolidate because some optimizing compilers try
+  to inline it at all call points, which turns out not to be an
+  optimization at all. (Inlining it in malloc_consolidate is fine though.)
+*/
+
+#if __STD_C
+static void malloc_mmap_state(void)
+#else
+static void malloc_mmap_state()
+#endif
+{
+  int mprot;
+  unsigned long pagesize = malloc_getpagesize;
+  size_t size = (sizeof(struct malloc_state) + pagesize - 1) & ~(pagesize - 1);
+
+  void * foo = MMAP(0, size+(2*pagesize), PROT_READ|PROT_WRITE, MAP_PRIVATE);
+
+
+#ifdef NDEBUG
+   if (foo == MAP_FAILED) {
+      fprintf (stderr, "Couldn't mmap struct malloc_state: %s\n", strerror (errno));
+      abort ();
+   }
+#else
+   assert(foo != MAP_FAILED);
+#endif
+
+   mprot = mprotect(foo, pagesize, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+     fprintf (stderr, "Couldn't mprotect first non-rw page for struct malloc_state: %s\n",
+	      strerror (errno));
+     abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+
+   av_ = (struct malloc_state *) ((char*)foo + pagesize);
+
+   MALLOC_ZERO(av_, sizeof(struct malloc_state));
+
+   mprot = mprotect((void*)((char*)foo + size + pagesize), (size_t) pagesize, PROT_NONE);
+#ifdef NDEBUG
+   if (mprot == -1) {
+     fprintf (stderr, 
+	      "Couldn't mprotect last non-rw page for struct malloc_state: %s\n",
+	      strerror (errno));
+     abort ();
+   }
+#else
+   assert(mprot != -1);
+#endif
+}
+
+#if __STD_C
+static void malloc_init_state(mstate av)
+#else
+static void malloc_init_state(av) mstate av;
+#endif
+{
+  int     i;
+  mbinptr bin;
+
+  void *  morecore_test = MORECORE(0);
+  unsigned long hashval;
+
+  /* Test morecore function 
+   */
+  set_morecore32bit(av);
+
+  if (morecore_test == MORECORE_FAILURE)
+    {
+      set_nonmorecore32bit(av);
+    }
+  else
+    {
+      /* On 64bit systems, the heap may be located above the
+       * 32bit address space. Since mmap() probably still can be
+       * convinced to map within 32bit, we don't use sbrk().
+       */
+      hashval = hash (morecore_test);
+      if (hashval >= AMOUNTHASH) 
+	{
+	  set_nonmorecore32bit(av);
+	}
+    }
+
+  
+  /* Establish circular links for normal bins */
+  for (i = 1; i < NBINS; ++i) { 
+    bin = bin_at(av,i);
+    bin->fd = bin->bk = bin;
+  }
+
+  av->top_pad        = DEFAULT_TOP_PAD;
+  av->n_mmaps_max    = DEFAULT_MMAP_MAX;
+  av->mmap_threshold = DEFAULT_MMAP_THRESHOLD;
+  av->trim_threshold = DEFAULT_TRIM_THRESHOLD;
+
+#if MORECORE_CONTIGUOUS
+  set_contiguous(av);
+#else
+  set_noncontiguous(av);
+#endif
+
+  set_max_fast(av, DEFAULT_MXFAST);
+
+  av->top = cireg_getfree ();
+  av->top->chunk     = (mchunkptr) startheap;
+  av->top->size      = 0;
+  set_previnuse(av->top);
+  clear_inuse(av->top);
+  hashtable[0]       = av->top;
+  av->pagesize       = malloc_getpagesize;
+
+  memcpy(av->guard_stored, dnmalloc_arc4random(), GUARD_SIZE);
+
+}
+
+/* 
+   Other internal utilities operating on mstates
+*/
+
+#if __STD_C
+static Void_t*  sYSMALLOc(INTERNAL_SIZE_T, mstate);
+static int      sYSTRIm(size_t, mstate);
+static void     malloc_consolidate(mstate);
+#else
+static Void_t*  sYSMALLOc();
+static int      sYSTRIm();
+static void     malloc_consolidate();
+#endif
+
+/* dnmalloc functions */
+/* needs mstate so moved here */
+
+static chunkinfoptr
+next_chunkinfo (chunkinfoptr ci)
+{
+   mchunkptr nextp;
+   unsigned long hashval;
+   chunkinfoptr cinfonextp;
+   mstate av = get_malloc_state();
+   
+   /* ci is not the last element in the linked list, just 
+      return the next chunkinfo from the list 
+   */
+   if (!ci->hash_next)
+     {
+       /* ci is the last element, find the next chunkinfo by 
+	* looking up the chunkinfo for the chunk that is after p's chunk 
+	*/
+       nextp = (mchunkptr) (((char *) (ci->chunk)) + chunksize (ci));
+
+       if (!(nextp == av->top->chunk)) 
+	 {
+	   hashval = hash (nextp);
+	   /* assert(hashval < AMOUNTHASH); *//* major bottleneck */
+	   cinfonextp = hashtable[hashval];
+	   if (cinfonextp && chunk (cinfonextp) == nextp)
+	     return cinfonextp; 
+	   
+#ifdef DNMALLOC_CHECKS_EXTRA
+	   /* This seems bogus; a chunkinfo may legally have no nextp if
+	    * it's the last one allocated (?)
+	    */
+	   else {
+	     if (cinfonextp)
+	       fprintf (stderr,
+			"Dnmalloc error: could not find a next chunkinfo for the chunk %p in the hashtable at entry %lu, cinfonextp: %p, chunk(cinfonextp): %p, nextp: %p\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n",
+			chunk(ci), hashval, cinfonextp, chunk(cinfonextp), nextp);
+	     else
+	       fprintf (stderr,
+			"Dnmalloc error: could not find a next chunkinfo for the chunk %p in the hashtable at entry %lu, cinfonextp: %s, chunk(cinfonextp): %s, nextp: %p\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n",
+			chunk(ci), hashval, "null", "null", nextp);
+	   }
+#endif
+	  
+	   return NULL;
+	 }
+       else
+	 {
+	   return av->top;
+	 }
+
+     }
+   else
+     {
+       return (ci->hash_next);
+     }
+}
+
+static int is_next_chunk(chunkinfoptr oldp, chunkinfoptr newp) {
+	mchunkptr nextp;
+	if (oldp->hash_next == newp)
+		return 1;
+	nextp = (mchunkptr) (((char *) (oldp->chunk)) + chunksize (oldp));
+	if (nextp == chunk(newp))
+		return 1;
+	return 0;
+}
+
+
+
+/* Get the chunkinfo of the physically previous chunk */
+/* Since we disposed of prev_size, we need this function to find the previous */
+
+static chunkinfoptr
+prev_chunkinfo (chunkinfoptr ci)
+{
+   unsigned int i;
+   chunkinfoptr prev;
+   mchunkptr prevchunk = 0;
+   /* chunkinfoptr temp; */
+   
+   /* Get the hashtable location of the chunkinfo */
+   i = hash (chunk (ci));
+   assert(i < AMOUNTHASH); /* rw */
+      
+   /* Get the first element of the linked list of chunkinfo's that contains p */
+   prev = hashtable[i];
+   
+   if (ci == prev) {
+     prevchunk = (mchunkptr) (((char *) (ci->chunk)) - (ci->prev_size));
+     i = hash(prevchunk);
+     assert(i < AMOUNTHASH); /* rw */
+     /* Loop over the linked list until we reach the last element */
+     for (prev = hashtable[i]; prev->hash_next != 0; prev = prev->hash_next) ;
+   } else {
+     /* p is not the first element in the linked list, we can just 
+	loop over the list and return the previous 
+     */
+     for (prev = hashtable[i]; prev->hash_next != ci; prev = prev->hash_next);
+   }
+
+   return prev;  
+}
+
+
+/*
+  Debugging support
+  Dnmalloc broke dlmallocs debugging functions, should fix them some 
+  time in the future, for now leave them undefined.
+*/
+
+#define check_chunk(P)
+#define check_free_chunk(P)
+#define check_inuse_chunk(P)
+#define check_remalloced_chunk(P,N)
+#define check_malloced_chunk(P,N)
+#define check_malloc_state()
+
+
+/* ----------- Routines dealing with system allocation -------------- */
+
+/*
+  sysmalloc handles malloc cases requiring more memory from the system.
+  On entry, it is assumed that av->top does not have enough
+  space to service request for nb bytes, thus requiring that av->top
+  be extended or replaced.
+*/
+
+#if __STD_C
+static Void_t* sYSMALLOc(INTERNAL_SIZE_T nb, mstate av)
+#else
+static Void_t* sYSMALLOc(nb, av) INTERNAL_SIZE_T nb; mstate av;
+#endif
+{
+  chunkinfoptr    old_top;        /* incoming value of av->top */
+  INTERNAL_SIZE_T old_size;       /* its size */
+  char*           old_end;        /* its end address */
+
+  long            size;           /* arg to first MORECORE or mmap call */
+  char*           brk;            /* return value from MORECORE */
+
+  long            correction;     /* arg to 2nd MORECORE call */
+  char*           snd_brk;        /* 2nd return val */
+
+  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
+  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
+  char*           aligned_brk;    /* aligned offset into brk */
+
+  chunkinfoptr    p;              /* the allocated/returned chunk */
+  chunkinfoptr    remainder;      /* remainder from allocation */
+  chunkinfoptr    fencepost;      /* fencepost */
+  CHUNK_SIZE_T    remainder_size; /* its size */
+
+  CHUNK_SIZE_T    sum;            /* for updating stats */
+
+  size_t          pagemask  = av->pagesize - 1;
+
+#ifdef DNMALLOC_DEBUG
+  fprintf(stderr, "Enter sysmalloc\n");
+#endif
+  /*
+    If there is space available in fastbins, consolidate and retry
+    malloc from scratch rather than getting memory from system.  This
+    can occur only if nb is in smallbin range so we didn't consolidate
+    upon entry to malloc. It is much easier to handle this case here
+    than in malloc proper.
+  */
+
+
+  if (have_fastchunks(av)) {
+    assert(in_smallbin_range(nb));
+    malloc_consolidate(av);
+#ifdef DNMALLOC_DEBUG
+    fprintf(stderr, "Return sysmalloc have_fastchunks\n");
+#endif
+    return mALLOc(nb - MALLOC_ALIGN_MASK);
+  }
+
+
+  /*
+    If have mmap, and the request size meets the mmap threshold, and
+    the system supports mmap, and there are few enough currently
+    allocated mmapped regions, try to directly map this request
+    rather than expanding top.
+  */
+
+  if (UNLIKELY((CHUNK_SIZE_T)(nb) >= (CHUNK_SIZE_T)(av->mmap_threshold) &&
+	       (av->n_mmaps < av->n_mmaps_max))) {
+
+    char* mm;             /* return value from mmap call*/
+
+    /*
+      Round up size to nearest page.  For mmapped chunks, the overhead
+      is one SIZE_SZ unit larger than for normal chunks, because there
+      is no following chunk whose prev_size field could be used.
+    */
+    size = (nb + MALLOC_ALIGN_MASK + pagemask) & ~pagemask;
+
+    /* Don't try if size wraps around 0 */
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+	    
+
+      mm = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+      
+      if (mm != (char*)(MORECORE_FAILURE)) {
+        
+        /*
+          The offset to the start of the mmapped region is stored
+          in the prev_size field of the chunk. This allows us to adjust
+          returned start address to meet alignment requirements here 
+          and in memalign(), and still be able to compute proper
+          address argument for later munmap in free() and realloc().
+        */
+        
+        front_misalign = (INTERNAL_SIZE_T) mm & MALLOC_ALIGN_MASK;
+	p = cireg_getfree();
+        
+        if (front_misalign > 0) {
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          p->chunk = (mchunkptr)(mm + correction);
+          p->hash_next = (chunkinfoptr) correction;
+          set_head(p, (size - correction) |INUSE|IS_MMAPPED);
+        }
+        else {
+          p->chunk = (mchunkptr)mm;
+          p->hash_next = 0;
+          set_head(p, size|INUSE|IS_MMAPPED);
+        }
+        hashtable_add(p);
+        /* update statistics */
+        
+        if (++av->n_mmaps > av->max_n_mmaps) 
+          av->max_n_mmaps = av->n_mmaps;
+        
+        sum = av->mmapped_mem += size;
+        if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) 
+          av->max_mmapped_mem = sum;
+        sum += av->sbrked_mem;
+        if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) 
+          av->max_total_mem = sum;
+
+        check_chunk(p);
+        
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Return mmapped (%lu, total %lu)\n", 
+		size, (unsigned long)/* size_t */av->max_total_mem );
+#endif
+        return chunk(p);
+      }
+    }
+  }
+
+  /* Record incoming configuration of top */
+
+  old_top  = av->top;
+  old_size = chunksize(old_top);
+  old_end  = (char*)(chunk_at_offset(chunk(old_top), old_size));
+
+  brk = snd_brk = (char*)(MORECORE_FAILURE); 
+
+  /* 
+     If not the first time through, we require old_size to be
+     at least MINSIZE and to have prev_inuse set.
+  */
+
+  /* assert((old_top == initial_top(av) && old_size == 0) || 
+	 ((CHUNK_SIZE_T) (old_size) >= MINSIZE &&
+	 prev_inuse(old_top))); */
+
+  /* Precondition: not enough current space to satisfy nb request */
+  assert((CHUNK_SIZE_T)(old_size) < (CHUNK_SIZE_T)(nb + MINSIZE));
+
+  /* Precondition: all fastbins are consolidated */
+  assert(!have_fastchunks(av));
+
+  /* Request enough space for nb + pad + overhead */
+  size = nb + av->top_pad + MINSIZE;
+
+  /*
+    If contiguous, we can subtract out existing space that we hope to
+    combine with new space. We add it back later only if
+    we don't actually get contiguous space.
+  */
+  if (contiguous(av))
+    size -= old_size;
+
+  /*
+    Round to a multiple of page size.
+    If MORECORE is not contiguous, this ensures that we only call it
+    with whole-page arguments.  And if MORECORE is contiguous and
+    this is not first time through, this preserves page-alignment of
+    previous calls. Otherwise, we correct to page-align below.
+  */
+
+  size = (size + pagemask) & ~pagemask;
+
+  /*
+    Don't try to call MORECORE if argument is so big as to appear
+    negative. Note that since mmap takes size_t arg, it may succeed
+    below even if we cannot call MORECORE.
+  */
+  if (size > 0 && morecore32bit(av)) 
+    brk = (char*)(MORECORE(size));
+
+  /*
+    If have mmap, try using it as a backup when MORECORE fails or
+    cannot be used. This is worth doing on systems that have "holes" in
+    address space, so sbrk cannot extend to give contiguous space, but
+    space is available elsewhere.  Note that we ignore mmap max count
+    and threshold limits, since the space will not be used as a
+    segregated mmap region.
+  */
+  if (brk != (char*)(MORECORE_FAILURE)) {
+    av->sbrked_mem += size;
+  }
+
+  else {
+
+#ifdef DNMALLOC_DEBUG
+    fprintf(stderr, "Morecore failure in sysmalloc\n");
+#endif
+
+    /* Cannot merge with old top, so add its size back in */
+    if (contiguous(av))
+      size = (size + old_size + pagemask) & ~pagemask;
+
+    /* If we are relying on mmap as backup, then use larger units */
+    if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(MMAP_AS_MORECORE_SIZE))
+      size = MMAP_AS_MORECORE_SIZE;
+
+    /* Don't try if size wraps around 0 */
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb)) {
+
+#ifdef DNMALLOC_DEBUG
+      fprintf(stderr, "Try mmap in sysmalloc\n");
+#endif
+      brk = (char*)(MMAP(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE));
+      
+      if (brk != (char*)(MORECORE_FAILURE)) {
+        
+	av->mmapped_mem += size;
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Mmapped successfully in sysmalloc %p\n", brk);
+#endif
+
+        /* We do not need, and cannot use, another sbrk call to find end */
+        snd_brk = brk + size;
+        
+        /* 
+           Record that we no longer have a contiguous sbrk region. 
+           After the first time mmap is used as backup, we do not
+           ever rely on contiguous space since this could incorrectly
+           bridge regions.
+        */
+        set_noncontiguous(av);
+      }
+    }
+  }
+
+  if (brk != (char*)(MORECORE_FAILURE)) {
+#ifdef DNMALLOC_DEBUG
+    fprintf(stderr, "Success path %lu allocated, sbrked %lu\n", 
+	    size, (unsigned long)av->sbrked_mem);
+#endif
+    /* av->sbrked_mem += size; moved up */
+
+    /*
+      If MORECORE extends previous space, we can likewise extend top size.
+    */
+    
+    if (brk == old_end && snd_brk == (char*)(MORECORE_FAILURE)) {
+      set_head(old_top, (size + old_size) | PREV_INUSE);
+#ifdef DNMALLOC_DEBUG
+      fprintf(stderr, "Previous space extended\n");
+#endif
+    }
+
+    /*
+      Otherwise, make adjustments:
+      
+      * If the first time through or noncontiguous, we need to call sbrk
+        just to find out where the end of memory lies.
+
+      * We need to ensure that all returned chunks from malloc will meet
+        MALLOC_ALIGNMENT
+
+      * If there was an intervening foreign sbrk, we need to adjust sbrk
+        request size to account for fact that we will not be able to
+        combine new space with existing space in old_top.
+
+      * Almost all systems internally allocate whole pages at a time, in
+        which case we might as well use the whole last page of request.
+        So we allocate enough more memory to hit a page boundary now,
+        which in turn causes future contiguous calls to page-align.
+    */
+    
+    else {
+      front_misalign = 0;
+      end_misalign = 0;
+      correction = 0;
+      aligned_brk = brk;
+
+      /*
+        If MORECORE returns an address lower than we have seen before,
+        we know it isn't really contiguous.  This and some subsequent
+        checks help cope with non-conforming MORECORE functions and
+        the presence of "foreign" calls to MORECORE from outside of
+        malloc or by other threads.  We cannot guarantee to detect
+        these in all cases, but cope with the ones we do detect.
+      */
+      if (contiguous(av) && old_size != 0 && brk < old_end) {
+        set_noncontiguous(av);
+      }
+      
+      /* handle contiguous cases */
+      if (contiguous(av)) { 
+
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Handle contiguous cases\n");
+#endif
+        /* 
+           We can tolerate forward non-contiguities here (usually due
+           to foreign calls) but treat them as part of our space for
+           stats reporting.
+        */
+        if (old_size != 0) 
+          av->sbrked_mem += brk - old_end;
+        
+        /* Guarantee alignment of first new chunk made from this space */
+
+        front_misalign = (INTERNAL_SIZE_T) brk & MALLOC_ALIGN_MASK;
+        if (front_misalign > 0) {
+
+          /*
+            Skip over some bytes to arrive at an aligned position.
+            We don't need to specially mark these wasted front bytes.
+            They will never be accessed anyway because
+            prev_inuse of av->top (and any chunk created from its start)
+            is always true after initialization.
+          */
+
+          correction = MALLOC_ALIGNMENT - front_misalign;
+          aligned_brk += correction;
+        }
+        
+        /*
+          If this isn't adjacent to existing space, then we will not
+          be able to merge with old_top space, so must add to 2nd request.
+        */
+        
+        correction += old_size;
+        
+        /* Extend the end address to hit a page boundary */
+        end_misalign = (INTERNAL_SIZE_T)(brk + size + correction);
+        correction += ((end_misalign + pagemask) & ~pagemask) - end_misalign;
+        
+        assert(correction >= 0);
+        snd_brk = (char*)(MORECORE(correction));
+        
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          /*
+            If can't allocate correction, try to at least find out current
+            brk.  It might be enough to proceed without failing.
+          */
+          correction = 0;
+          snd_brk = (char*)(MORECORE(0));
+        }
+        else if (snd_brk < brk) {
+          /*
+            If the second call gives noncontiguous space even though
+            it says it won't, the only course of action is to ignore
+            results of second call, and conservatively estimate where
+            the first call left us. Also set noncontiguous, so this
+            won't happen again, leaving at most one hole.
+            
+            Note that this check is intrinsically incomplete.  Because
+            MORECORE is allowed to give more space than we ask for,
+            there is no reliable way to detect a noncontiguity
+            producing a forward gap for the second call.
+          */
+          snd_brk = brk + size;
+          correction = 0;
+          set_noncontiguous(av);
+        }
+
+      }
+      
+      /* handle non-contiguous cases */
+      else { 
+
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Handle non-contiguous cases\n");
+#endif
+
+        /* MORECORE/mmap must correctly align */
+        assert(aligned_OK(brk));
+        
+        /* Find out current end of memory */
+        if (snd_brk == (char*)(MORECORE_FAILURE)) {
+          snd_brk = (char*)(MORECORE(0));
+          av->sbrked_mem += snd_brk - brk - size;
+        }
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Sbrked now %lu\n", (unsigned long)av->sbrked_mem);
+#endif
+      }
+      
+      /* Adjust top based on results of second sbrk.
+       *
+       * If mmap() has been used as backup for failed morecore(),
+       * we end up in this branch as well.
+       */
+      if (snd_brk != (char*)(MORECORE_FAILURE)) {
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Adjust top, correction %lu\n", correction);
+#endif
+        /* hashtable_remove(chunk(av->top)); *//* rw 19.05.2008 removed */
+	av->top =  cireg_getfree();
+        av->top->chunk = (mchunkptr)aligned_brk;
+        set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
+#ifdef DNMALLOC_DEBUG
+	fprintf(stderr, "Adjust top, top %p size %lu\n", 
+		av->top, (unsigned long)chunksize(av->top));
+#endif
+        hashtable_add(av->top);
+        av->sbrked_mem += correction;
+     
+        /*
+          If not the first time through, we either have a
+          gap due to foreign sbrk or a non-contiguous region.  Insert a
+          double fencepost at old_top to prevent consolidation with space
+          we don't own. These fenceposts are artificial chunks that are
+          marked as inuse. Original dlmalloc had two of these but too 
+          small to use. To ensure that the linked lists contain a maximum 
+          of 8 elements we only use 1. Inuse is determined by the 
+          current rather than the next chunk anyway.
+        */
+   
+        if (old_size != 0) {
+#ifdef DNMALLOC_DEBUG
+	  fprintf(stderr, "Shrink old_top to insert fenceposts\n");
+#endif
+          /* 
+             Shrink old_top to insert fenceposts, keeping size a
+             multiple of MALLOC_ALIGNMENT. We know there is at least
+             enough space in old_top to do this.
+          */
+#ifdef DNMALLOC_DEBUG
+	  fprintf(stderr, "Adjust top, old_top %p old_size before %lu\n", 
+		  old_top, (unsigned long)old_size);
+#endif
+          old_size = (old_size - 4*SIZE_SZ) & ~MALLOC_ALIGN_MASK;
+          set_head(old_top, old_size | PREV_INUSE);
+#ifdef DNMALLOC_DEBUG
+	  fprintf(stderr, "Adjust top, old_size after %lu\n", 
+		  (unsigned long)old_size);
+#endif
+          
+          /*
+            Note that the following assignments completely overwrite
+            old_top when old_size was previously MINSIZE.  This is
+            intentional. We need the fencepost, even if old_top otherwise gets
+            lost.
+          */
+          /* dnmalloc, we need the fencepost to be 16 bytes, however since 
+	     it's marked inuse it will never be coalesced 
+	  */
+          fencepost = cireg_getfree();
+          fencepost->chunk = (mchunkptr) chunk_at_offset(chunk(old_top), 
+							 old_size);
+          fencepost->size = 16|INUSE|PREV_INUSE;
+          hashtable_add(fencepost);
+          /* 
+             If possible, release the rest, suppressing trimming.
+          */
+          if (old_size >= MINSIZE) {
+            INTERNAL_SIZE_T tt = av->trim_threshold;
+#ifdef DNMALLOC_DEBUG
+	    fprintf(stderr, "Release\n");
+#endif
+            av->trim_threshold = (INTERNAL_SIZE_T)(-1);
+	    set_head(old_top, old_size | PREV_INUSE | INUSE);
+	    guard_set(av->guard_stored, old_top, 0, old_size);
+            fREe(chunk(old_top));
+            av->trim_threshold = tt;
+#ifdef DNMALLOC_DEBUG
+	    fprintf(stderr, "Release done\n");
+#endif
+          }
+
+#ifdef DNMALLOC_DEBUG
+	  fprintf(stderr, "Adjust top, size %lu\n", 
+		  (unsigned long)chunksize(av->top));
+#endif
+
+        } /* fenceposts */
+      } /* adjust top */
+    } /* not extended previous region */
+    
+    /* Update statistics */ /* FIXME check this */
+    sum = av->sbrked_mem;
+    if (sum > (CHUNK_SIZE_T)(av->max_sbrked_mem))
+      av->max_sbrked_mem = sum;
+    
+    sum += av->mmapped_mem;
+    if (sum > (CHUNK_SIZE_T)(av->max_total_mem))
+      av->max_total_mem = sum;
+
+    check_malloc_state();
+    
+    /* finally, do the allocation */
+
+    p = av->top;
+    size = chunksize(p);
+    
+#ifdef DNMALLOC_DEBUG
+    fprintf(stderr, "Size: %lu  nb+MINSIZE: %lu\n", 
+	    (CHUNK_SIZE_T)(size), (CHUNK_SIZE_T)(nb + MINSIZE));
+#endif
+
+    /* check that one of the above allocation paths succeeded */
+    if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+      remainder_size = size - nb;
+      remainder = cireg_getfree();
+      remainder->chunk = chunk_at_offset(chunk(p), nb);
+      av->top = remainder;
+      set_head(p, nb | PREV_INUSE | INUSE);
+      set_head(remainder, remainder_size | PREV_INUSE);
+      hashtable_insert (p, av->top);
+      check_malloced_chunk(p, nb);
+#ifdef DNMALLOC_DEBUG
+      fprintf(stderr, "Return any (total %lu)\n", 
+	      (unsigned long)/* size_t */av->max_total_mem );
+#endif
+      return chunk(p);
+    }
+
+  }
+
+#ifdef DNMALLOC_DEBUG
+  fprintf(stderr, "Return failed (total %lu)\n", 
+	  (unsigned long)/* size_t */av->max_total_mem );
+#endif
+
+  /* catch all failure paths */
+  MALLOC_FAILURE_ACTION;
+  return 0;
+}
+
+
+
+
+/*
+  sYSTRIm is an inverse of sorts to sYSMALLOc.  It gives memory back
+  to the system (via negative arguments to sbrk) if there is unused
+  memory at the `high' end of the malloc pool. It is called
+  automatically by free() when top space exceeds the trim
+  threshold. It is also called by the public malloc_trim routine.  It
+  returns 1 if it actually released any memory, else 0.
+*/
+
+#if __STD_C
+static int sYSTRIm(size_t pad, mstate av)
+#else
+static int sYSTRIm(pad, av) size_t pad; mstate av;
+#endif
+{
+  long  top_size;        /* Amount of top-most memory */
+  long  extra;           /* Amount to release */
+  long  released;        /* Amount actually released */
+  char* current_brk;     /* address returned by pre-check sbrk call */
+  char* new_brk;         /* address returned by post-check sbrk call */
+  size_t pagesz;
+
+  pagesz = av->pagesize;
+  top_size = chunksize(av->top);
+  
+  /* Release in pagesize units, keeping at least one page */
+  extra = ((top_size - pad - MINSIZE + (pagesz-1)) / pagesz - 1) * pagesz;
+  
+  if (extra > 0) {
+    
+    /*
+      Only proceed if end of memory is where we last set it.
+      This avoids problems if there were foreign sbrk calls.
+    */
+    current_brk = (char*)(MORECORE(0));
+    if (current_brk == (char*)(av->top) + top_size) {
+      
+      /*
+        Attempt to release memory. We ignore MORECORE return value,
+        and instead call again to find out where new end of memory is.
+        This avoids problems if first call releases less than we asked,
+        of if failure somehow altered brk value. (We could still
+        encounter problems if it altered brk in some very bad way,
+        but the only thing we can do is adjust anyway, which will cause
+        some downstream failure.)
+      */
+      
+      MORECORE(-extra);
+      new_brk = (char*)(MORECORE(0));
+      
+      if (new_brk != (char*)MORECORE_FAILURE) {
+        released = (long)(current_brk - new_brk);
+        
+        if (released != 0) {
+          /* Success. Adjust top. */
+          av->sbrked_mem -= released;
+          set_head(av->top, (top_size - released) | PREV_INUSE);
+          check_malloc_state();
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+  ------------------------------ malloc ------------------------------
+*/
+
+
+#if __STD_C
+DL_STATIC Void_t* mALLOc(size_t bytes)
+#else
+DL_STATIC   Void_t* mALLOc(bytes) size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  INTERNAL_SIZE_T nb;               /* normalized request size */
+  unsigned int    idx;              /* associated bin index */
+  mbinptr         bin;              /* associated bin */
+  mfastbinptr*    fb;               /* associated fastbin */
+
+  chunkinfoptr       victim;           /* inspected/selected chunk */
+  INTERNAL_SIZE_T size;             /* its size */
+  int             victim_index;     /* its bin index */
+
+  chunkinfoptr       remainder;        /* remainder from a split */
+  CHUNK_SIZE_T    remainder_size;   /* its size */
+
+  unsigned int    block;            /* bit map traverser */
+  unsigned int    bit;              /* bit map traverser */
+  unsigned int    map;              /* current word of binmap */
+
+  chunkinfoptr       fwd;              /* misc temp for linking */
+  chunkinfoptr       bck;              /* misc temp for linking */
+  
+  Void_t*         retval;
+
+  /* chunkinfoptr	  next; */
+ 
+
+  /*
+    Convert request size to internal form by adding SIZE_SZ bytes
+    overhead plus possibly more to obtain necessary alignment and/or
+    to obtain a size of at least MINSIZE, the smallest allocatable
+    size. Also, checked_request2size traps (returning 0) request sizes
+    that are so large that they wrap around zero when padded and
+    aligned.
+  */
+#if defined(SH_CUTEST)
+  extern int malloc_count;
+  ++malloc_count;
+#endif
+
+  checked_request2size(bytes, nb);
+
+  /*
+    Bypass search if no frees yet
+   */
+  if (av && have_anychunks(av)) {
+    goto av_initialized;
+  }
+  else {
+    if (!av || av->max_fast == 0) { /* initialization check */
+      malloc_consolidate(av);
+      av = get_malloc_state();
+    }
+    goto use_top;
+  }
+
+ av_initialized:
+
+  /*
+    If the size qualifies as a fastbin, first check corresponding bin.
+  */
+  if ((CHUNK_SIZE_T)(nb) <= (CHUNK_SIZE_T)(av->max_fast)) {
+    fb = &(av->fastbins[(fastbin_index(nb))]);
+    if ( (victim = *fb) != 0) {
+      *fb = victim->fd;
+      check_remalloced_chunk(victim, nb);
+      guard_set(av->guard_stored, victim, bytes, nb);
+      return chunk(victim);
+    }
+  }
+
+  /*
+    If a small request, check regular bin.  Since these "smallbins"
+    hold one size each, no searching within bins is necessary.
+    (For a large request, we need to wait until unsorted chunks are
+    processed to find best fit. But for small ones, fits are exact
+    anyway, so we can check now, which is faster.)
+  */
+
+  if (in_smallbin_range(nb)) {
+    idx = smallbin_index(nb);
+    bin = bin_at(av,idx);
+
+    if ((victim = last(bin)) != bin) {
+      bck = victim->bk;
+      bin->bk = bck;
+      bck->fd = bin;
+
+      set_all_inuse(victim);
+            
+      check_malloced_chunk(victim, nb);
+      guard_set(av->guard_stored, victim, bytes, nb);
+      return chunk(victim);
+    }
+  }
+
+  /* 
+     If this is a large request, consolidate fastbins before continuing.
+     While it might look excessive to kill all fastbins before
+     even seeing if there is space available, this avoids
+     fragmentation problems normally associated with fastbins.
+     Also, in practice, programs tend to have runs of either small or
+     large requests, but less often mixtures, so consolidation is not 
+     invoked all that often in most programs. And the programs that
+     it is called frequently in otherwise tend to fragment.
+  */
+
+  else {
+    idx = largebin_index(nb);
+    if (have_fastchunks(av)) 
+      malloc_consolidate(av);
+  }
+
+  /*
+    Process recently freed or remaindered chunks, taking one only if
+    it is exact fit, or, if this a small request, the chunk is remainder from
+    the most recent non-exact fit.  Place other traversed chunks in
+    bins.  Note that this step is the only place in any routine where
+    chunks are placed in bins.
+  */
+    
+  while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {
+    bck = victim->bk;
+    size = chunksize(victim);
+    
+    /* 
+       If a small request, try to use last remainder if it is the
+       only chunk in unsorted bin.  This helps promote locality for
+       runs of consecutive small requests. This is the only
+       exception to best-fit, and applies only when there is
+       no exact fit for a small chunk.
+    */
+    
+    if (UNLIKELY(in_smallbin_range(nb) && 
+		 bck == unsorted_chunks(av) &&
+		 victim == av->last_remainder &&
+		 (CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE))) {
+      
+      /* split and reattach remainder */
+      remainder_size = size - nb;
+      remainder = cireg_getfree();
+      remainder->chunk = chunk_at_offset(chunk(victim), nb);
+      unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+      av->last_remainder = remainder; 
+      remainder->bk = remainder->fd = unsorted_chunks(av);
+      
+      set_head(victim, nb | PREV_INUSE|INUSE);
+      set_head(remainder, remainder_size | PREV_INUSE);      
+      hashtable_insert(victim, remainder);
+
+      check_malloced_chunk(victim, nb);
+      guard_set(av->guard_stored, victim, bytes, nb);
+      return chunk(victim);
+    }
+    
+    /* remove from unsorted list */
+    unsorted_chunks(av)->bk = bck;
+    bck->fd = unsorted_chunks(av);
+    
+    /* Take now instead of binning if exact fit */
+    
+    if (UNLIKELY(size == nb)) {
+      set_all_inuse(victim)
+      check_malloced_chunk(victim, nb);
+      guard_set(av->guard_stored, victim, bytes, nb);
+      return chunk(victim);
+    }
+    
+    /* place chunk in bin */
+    
+    if (in_smallbin_range(size)) {
+
+      victim_index = smallbin_index(size);
+      bck = bin_at(av, victim_index);
+      fwd = bck->fd;
+    }
+    else {
+      victim_index = largebin_index(size);
+      bck = bin_at(av, victim_index);
+      fwd = bck->fd;
+      
+      if (UNLIKELY(fwd != bck)) {
+        /* if smaller than smallest, place first */
+        if ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(bck->bk->size)) {
+          fwd = bck;
+          bck = bck->bk;
+        }
+        else if ((CHUNK_SIZE_T)(size) >= 
+                 (CHUNK_SIZE_T)(FIRST_SORTED_BIN_SIZE)) {
+          
+          /* maintain large bins in sorted order */
+          size |= PREV_INUSE|INUSE; /* Or with inuse bits to speed comparisons */
+          while ((CHUNK_SIZE_T)(size) < (CHUNK_SIZE_T)(fwd->size)) 
+            fwd = fwd->fd;
+          bck = fwd->bk;
+        }
+      }
+    }
+
+    mark_bin(av, victim_index);
+    victim->bk = bck;
+    victim->fd = fwd;
+    fwd->bk = victim;
+    bck->fd = victim;
+  }
+  
+  /*
+    If a large request, scan through the chunks of current bin to
+    find one that fits.  (This will be the smallest that fits unless
+    FIRST_SORTED_BIN_SIZE has been changed from default.)  This is
+    the only step where an unbounded number of chunks might be
+    scanned without doing anything useful with them. However the
+    lists tend to be short.
+  */
+
+  if (!in_smallbin_range(nb)) {
+    bin = bin_at(av, idx);
+    
+    victim = last(bin);
+
+    if (UNLIKELY(victim != bin)) {
+
+      do {
+	size = chunksize(victim);
+      
+	if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb)) {
+	  remainder_size = size - nb;
+	  unlink(victim, bck, fwd);
+        
+	  /* Split */
+	  if (remainder_size >= MINSIZE) {
+	    remainder = cireg_getfree();
+	    remainder->chunk = chunk_at_offset(chunk(victim), nb);
+	    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+	    remainder->bk = remainder->fd = unsorted_chunks(av);
+	    set_head(victim, nb | PREV_INUSE | INUSE);
+	    set_head(remainder, remainder_size | PREV_INUSE);
+	    hashtable_insert(victim, remainder);
+	    check_malloced_chunk(victim, nb);
+	    guard_set(av->guard_stored, victim, bytes, nb);
+	    return chunk(victim);
+	  } 
+	  /* Exhaust */
+	  else  {
+	    set_all_inuse(victim);
+	    check_malloced_chunk(victim, nb);
+	    guard_set(av->guard_stored, victim, bytes, nb);
+	    return chunk(victim);
+	  }
+	}
+	victim = victim->bk;
+      } while(victim != bin);
+    }
+  }
+
+  /*
+    Search for a chunk by scanning bins, starting with next largest
+    bin. This search is strictly by best-fit; i.e., the smallest
+    (with ties going to approximately the least recently used) chunk
+    that fits is selected.
+    
+    The bitmap avoids needing to check that most blocks are nonempty.
+  */
+    
+
+  ++idx;
+  bin = bin_at(av,idx);
+  block = idx2block(idx);
+  map = av->binmap[block];
+  bit = idx2bit(idx);
+  
+  for (;;) {
+    
+    /* Skip rest of block if there are no more set bits in this block.  */
+    if (bit > map || bit == 0) {
+      do {
+        if (++block >= BINMAPSIZE)  /* out of bins */
+          goto use_top;
+      } while ( (map = av->binmap[block]) == 0);
+      
+      bin = bin_at(av, (block << BINMAPSHIFT));
+      bit = 1;
+    }
+    
+    /* Advance to bin with set bit. There must be one. */
+    while ((bit & map) == 0) {
+      bin = next_bin(bin);
+      bit <<= 1;
+      assert(bit != 0);
+    }
+    
+    /* Inspect the bin. It is likely to be non-empty */
+    victim = last(bin);
+    
+    /*  If a false alarm (empty bin), clear the bit. */
+    if (victim == bin) {
+      av->binmap[block] = map &= ~bit; /* Write through */
+      bin = next_bin(bin);
+      bit <<= 1;
+    }
+    
+    else {
+      size = chunksize(victim);
+      
+      /*  We know the first chunk in this bin is big enough to use. */
+      assert((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb));
+      
+      remainder_size = size - nb;
+      
+      /* unlink */
+      bck = victim->bk;
+      bin->bk = bck;
+      bck->fd = bin;
+      
+      /* Split */
+      if (remainder_size >= MINSIZE) {
+        remainder = cireg_getfree();
+        remainder->chunk = chunk_at_offset(chunk(victim), nb);
+        
+        unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
+        remainder->bk = remainder->fd = unsorted_chunks(av);
+        /* advertise as last remainder */
+        if (in_smallbin_range(nb)) 
+          av->last_remainder = remainder; 
+        
+        set_head(victim, nb | PREV_INUSE | INUSE);
+        set_head(remainder, remainder_size | PREV_INUSE);
+        hashtable_insert(victim, remainder);
+        check_malloced_chunk(victim, nb);
+	guard_set(av->guard_stored, victim, bytes, nb);
+        return chunk(victim);
+      }
+      /* Exhaust */
+      else {
+        set_all_inuse(victim);
+        check_malloced_chunk(victim, nb);
+	guard_set(av->guard_stored, victim, bytes, nb);
+        return chunk(victim);
+      }
+      
+    }
+  }
+
+  use_top:
+   
+
+  /*
+    If large enough, split off the chunk bordering the end of memory
+    (held in av->top). Note that this is in accord with the best-fit
+    search rule.  In effect, av->top is treated as larger (and thus
+    less well fitting) than any other available chunk since it can
+    be extended to be as large as necessary (up to system
+    limitations).
+    
+    We require that av->top always exists (i.e., has size >=
+    MINSIZE) after initialization, so if it would otherwise be
+    exhuasted by current request, it is replenished. (The main
+    reason for ensuring it exists is that we may need MINSIZE space
+    to put in fenceposts in sysmalloc.)
+  */
+  
+  victim = av->top;
+  size = chunksize(victim);
+  
+  if ((CHUNK_SIZE_T)(size) >= (CHUNK_SIZE_T)(nb + MINSIZE)) {
+     remainder = cireg_getfree();
+    remainder_size = size - nb;
+    remainder->chunk = chunk_at_offset(chunk(victim), nb);
+    av->top = remainder;
+    set_head(victim, nb | PREV_INUSE | INUSE);
+    set_head(remainder, remainder_size | PREV_INUSE);
+    hashtable_insert(victim, remainder);
+    check_malloced_chunk(victim, nb);
+    guard_set(av->guard_stored, victim, bytes, nb);
+    return chunk(victim);
+  }
+  
+  /* 
+     If no space in top, relay to handle system-dependent cases 
+  */
+  retval = sYSMALLOc(nb, av);
+  if (retval) {
+    victim = mem2chunk(retval);
+    guard_set(av->guard_stored, victim, bytes, nb);
+  }
+  return retval;
+}
+
+/*
+  ------------------------------ free ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC void fREe(Void_t* mem)
+#else
+DL_STATIC void fREe(mem) Void_t* mem;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  chunkinfoptr       p;           /* chunk corresponding to mem */
+  INTERNAL_SIZE_T size;        /* its size */
+  mfastbinptr*    fb;          /* associated fastbin */
+  chunkinfoptr       prevchunk;   /* previous physical chunk */
+  chunkinfoptr       nextchunk;   /* next contiguous chunk */
+  INTERNAL_SIZE_T nextsize;    /* its size */
+  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+  chunkinfoptr       bck;         /* misc temp for linking */
+  chunkinfoptr       fwd;         /* misc temp for linking */
+  chunkinfoptr	     next;
+#if defined(SH_CUTEST)
+  extern int malloc_count;
+  --malloc_count;
+#endif
+
+  /* free(0) has no effect */
+  if (mem != 0) {
+    p = hashtable_lookup(mem);
+    /* check that memory is managed by us 
+     * and is inuse 
+     */
+    if (UNLIKELY(!p || !inuse(p))) 
+      {
+#ifdef DNMALLOC_CHECKS
+	if (p) {
+	  fprintf(stderr, "Attempt to free memory not in use\n");
+	  abort();
+	} else {
+	  fprintf(stderr, "Attempt to free memory not allocated\n");
+	  abort();
+	}
+#endif
+	assert(p && inuse(p));
+	return;
+      }
+
+    guard_check(av->guard_stored, p);
+
+    size = chunksize(p);
+
+    check_inuse_chunk(p);
+
+    /*
+      If eligible, place chunk on a fastbin so it can be found
+      and used quickly in malloc.
+    */
+
+    if ((CHUNK_SIZE_T)(size) <= (CHUNK_SIZE_T)(av->max_fast)
+
+#if TRIM_FASTBINS
+        /* 
+           If TRIM_FASTBINS set, don't place chunks
+           bordering top into fastbins
+        */
+        && (chunk_at_offset(chunk(p), size) != av->top)
+#endif
+        ) {
+
+      set_fastchunks(av);
+      fb = &(av->fastbins[fastbin_index(size)]);
+      p->fd = *fb;
+      *fb = p;
+    }
+
+    /*
+       Consolidate other non-mmapped chunks as they arrive.
+    */
+
+    else if (!chunk_is_mmapped(p)) {
+      set_anychunks(av);
+
+      nextchunk = next_chunkinfo(p);
+      if (nextchunk)
+	nextsize = chunksize(nextchunk);
+      else
+	nextsize = 0;/* gcc doesn't notice that it's only used if (nextchunk)*/
+
+      /* consolidate backward */
+      if (UNLIKELY(!prev_inuse(p))) {
+        prevchunk = prev_chunkinfo(p);
+        prevsize = chunksize(prevchunk);
+#ifdef DNMALLOC_CHECKS
+	if (inuse(prevchunk)) {
+		fprintf(stderr, "Dnmalloc error: trying to unlink an inuse chunk: %p (chunk: %p)\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n", prevchunk, chunk(prevchunk));
+		abort();
+	}
+#else
+	assert(!inuse(prevchunk));
+#endif
+        size += prevsize;
+        unlink(prevchunk, bck, fwd);
+	set_head(p, size | PREV_INUSE);
+        hashtable_skiprm(prevchunk,p);
+        /* This chunk no longer exists in any form: release the chunkinfoptr 
+	 */
+        freecilst_add(p);
+        p = prevchunk;
+      }
+
+      if (nextchunk) {
+	if (nextchunk != av->top) {
+	  /* get and clear inuse bit */
+	  clear_previnuse(nextchunk);
+	  
+	  /* consolidate forward */
+	  if (!inuse(nextchunk)) {
+	    unlink(nextchunk, bck, fwd);
+	    size += nextsize;
+	    set_head(p, size | PREV_INUSE);
+	    hashtable_skiprm(p, nextchunk);
+	    freecilst_add (nextchunk);
+	  }
+	  
+	  set_head(p, size | PREV_INUSE);
+	  next = next_chunkinfo(p);
+	  if (next)
+	    next->prev_size = size;
+	  
+	  /*
+	    Place the chunk in unsorted chunk list. Chunks are
+	    not placed into regular bins until after they have
+	    been given one chance to be used in malloc.
+	  */
+	  
+	  bck = unsorted_chunks(av);
+	  fwd = bck->fd;
+	  p->bk = bck;
+	  p->fd = fwd;
+	  bck->fd = p;
+	  fwd->bk = p;
+	  
+	  nextchunk = next_chunkinfo(p);
+	  if (nextchunk)
+	    nextchunk->prev_size = chunksize(p);	
+	  
+	  check_free_chunk(p);
+	}
+	
+	/*
+	  If the chunk borders the current high end of memory,
+	  consolidate into top
+	*/
+	
+	else {
+	  size += nextsize;
+	  set_head(p, size | PREV_INUSE);
+	  hashtable_remove(chunk(av->top));
+	  freecilst_add(av->top);
+	  av->top = p;
+	  check_chunk(p);
+	}
+      } /* if (nextchunk) */
+
+      /*
+        If freeing a large space, consolidate possibly-surrounding
+        chunks. Then, if the total unused topmost memory exceeds trim
+        threshold, ask malloc_trim to reduce top.
+
+        Unless max_fast is 0, we don't know if there are fastbins
+        bordering top, so we cannot tell for sure whether threshold
+        has been reached unless fastbins are consolidated.  But we
+        don't want to consolidate on each free.  As a compromise,
+        consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+        is reached.
+      */
+
+      if (UNLIKELY((CHUNK_SIZE_T)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD)) { 
+        if (have_fastchunks(av)) 
+          malloc_consolidate(av);
+
+#ifndef MORECORE_CANNOT_TRIM        
+        if ((CHUNK_SIZE_T)(chunksize(av->top)) >= 
+            (CHUNK_SIZE_T)(av->trim_threshold))
+	  {
+	    if (morecore32bit(av))
+	      {
+#ifdef DNMALLOC_DEBUG
+		fprintf(stderr, "Calling systrim from free()\n");
+#endif
+		sYSTRIm(av->top_pad, av);
+#ifdef DNMALLOC_DEBUG
+		fprintf(stderr, "Systrim done\n");
+#endif
+	      }
+	  }
+#endif
+      }
+
+    }
+    /*
+      If the chunk was allocated via mmap, release via munmap()
+      Note that if HAVE_MMAP is false but chunk_is_mmapped is
+      true, then user must have overwritten memory. There's nothing
+      we can do to catch this error unless DEBUG is set, in which case
+      check_inuse_chunk (above) will have triggered error.
+    */
+
+    else {
+      int ret;
+      INTERNAL_SIZE_T offset = (INTERNAL_SIZE_T) p->hash_next;
+      av->n_mmaps--;
+      av->mmapped_mem -= (size + offset);
+      ret = munmap((char*) chunk(p) - offset, size + offset);
+      hashtable_remove_mmapped(chunk(p));
+      freecilst_add(p);
+      /* munmap returns non-zero on failure */
+      assert(ret == 0);
+    }
+  }
+}
+
+/*
+  ------------------------- malloc_consolidate -------------------------
+
+  malloc_consolidate is a specialized version of free() that tears
+  down chunks held in fastbins.  Free itself cannot be used for this
+  purpose since, among other things, it might place chunks back onto
+  fastbins.  So, instead, we need to use a minor variant of the same
+  code.
+  
+  Also, because this routine needs to be called the first time through
+  malloc anyway, it turns out to be the perfect place to trigger
+  initialization code.
+*/
+
+#if __STD_C
+static void malloc_consolidate(mstate av)
+#else
+static void malloc_consolidate(av) mstate av;
+#endif
+{
+  mfastbinptr*    fb;                 /* current fastbin being consolidated */
+  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
+  chunkinfoptr       p;                  /* current chunk being consolidated */
+  chunkinfoptr       nextp;              /* next chunk to consolidate */
+  chunkinfoptr       prevp;
+  chunkinfoptr       unsorted_bin;       /* bin header */
+  chunkinfoptr       first_unsorted;     /* chunk to link to */
+
+  /* These have same use as in free() */
+  chunkinfoptr       nextchunk;
+  INTERNAL_SIZE_T size;
+  INTERNAL_SIZE_T nextsize;
+  INTERNAL_SIZE_T prevsize;
+  chunkinfoptr       bck;
+  chunkinfoptr       fwd;
+  chunkinfoptr	     next;
+ 
+  /*
+    If max_fast is 0, we know that av hasn't
+    yet been initialized, in which case do so below
+  */
+  if (av && av->max_fast != 0) {
+    clear_fastchunks(av);
+
+    unsorted_bin = unsorted_chunks(av);
+
+    /*
+      Remove each chunk from fast bin and consolidate it, placing it
+      then in unsorted bin. Among other reasons for doing this,
+      placing in unsorted bin avoids needing to calculate actual bins
+      until malloc is sure that chunks aren't immediately going to be
+      reused anyway.
+    */
+    
+    maxfb = &(av->fastbins[fastbin_index(av->max_fast)]);
+    fb = &(av->fastbins[0]);
+    do {
+      if ( UNLIKELY((p = *fb) != 0)) {
+        *fb = 0;
+	do {
+          check_inuse_chunk(p);
+          nextp = p->fd;
+          
+          /*
+	   * Slightly streamlined version of consolidation code in free() 
+	   */
+
+          size = chunksize(p);
+          nextchunk = next_chunkinfo(p);
+
+	  /* gcc doesn't notice that it's only used if (nextchunk) */
+	  if (nextchunk)
+	    nextsize = chunksize(nextchunk);
+	  else
+	    nextsize = 0; 
+          
+	  if (!prev_inuse(p)) {
+             prevp = prev_chunkinfo(p);
+             prevsize = chunksize(prevp);
+             size += prevsize;
+#ifdef DNMALLOC_CHECKS
+	     if (inuse(prevp)) {
+		fprintf(stderr, "Dnmalloc error: trying to unlink an inuse chunk (2): %p (chunk: %p)\n This is definitely a bug, please report it to dnmalloc@fort-knox.org.\n", prevp, chunk(prevp));
+		     abort();
+	     }
+#else
+	     assert(!inuse(prevp));
+#endif
+             unlink(prevp, bck, fwd);
+             set_head(p, size | PREV_INUSE);	     
+             hashtable_skiprm(prevp,p);
+             freecilst_add(p);
+             p=prevp;
+          }
+          
+	  if (nextchunk) {
+	    if (nextchunk != av->top) {
+
+	      clear_previnuse(nextchunk);
+            
+	      if (!inuse(nextchunk)) {
+		size += nextsize;
+		unlink(nextchunk, bck, fwd);
+		set_head(p, size | PREV_INUSE);
+		hashtable_skiprm(p,nextchunk);
+		freecilst_add(nextchunk);
+	      }
+	      
+	      first_unsorted = unsorted_bin->fd;
+	      unsorted_bin->fd = p;
+	      first_unsorted->bk = p;
+	      
+	      set_head(p, size | PREV_INUSE);
+	      p->bk = unsorted_bin;
+	      p->fd = first_unsorted;
+	      next = next_chunkinfo(p);
+	      if (next)
+	    	next->prev_size = size;
+
+            
+	    }
+          
+	    else {
+	      size += nextsize;
+	      set_head(p, size | PREV_INUSE);
+	      hashtable_remove(chunk(av->top));
+	      freecilst_add(av->top);
+	      av->top = p;
+	    }
+	  }
+          
+        } while ( (p = nextp) != 0);
+        
+      }
+    } while (fb++ != maxfb);
+  }
+  else {
+    // Initialize dnmalloc
+    dnmalloc_init();
+    malloc_init_state(get_malloc_state());
+    check_malloc_state();
+  }
+}
+
+/*
+  ------------------------------ realloc ------------------------------
+*/
+
+
+#if __STD_C
+DL_STATIC Void_t* rEALLOc(Void_t* oldmem, size_t bytes)
+#else
+DL_STATIC Void_t* rEALLOc(oldmem, bytes) Void_t* oldmem; size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+
+  INTERNAL_SIZE_T  nb;              /* padded request size */
+
+  chunkinfoptr        oldp;            /* chunk corresponding to oldmem */
+  INTERNAL_SIZE_T  oldsize;         /* its size */
+
+  chunkinfoptr        newp;            /* chunk to return */
+  INTERNAL_SIZE_T  newsize;         /* its size */
+  Void_t*          newmem;          /* corresponding user mem */
+
+  chunkinfoptr        next;            /* next contiguous chunk after oldp */
+
+  chunkinfoptr        remainder;       /* extra space at end of newp */
+  CHUNK_SIZE_T     remainder_size;  /* its size */
+
+  chunkinfoptr        bck;             /* misc temp for linking */
+  chunkinfoptr        fwd;             /* misc temp for linking */
+
+  CHUNK_SIZE_T     copysize;        /* bytes to copy */
+  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */
+  INTERNAL_SIZE_T* s;               /* copy source */ 
+  INTERNAL_SIZE_T* d;               /* copy destination */
+
+  
+#ifdef REALLOC_ZERO_BYTES_FREES
+  if (UNLIKELY(bytes == 0)) {
+    fREe(oldmem);
+    return 0;
+  }
+#endif
+
+  if (UNLIKELY(!av || av->max_fast == 0)) {
+    malloc_consolidate(av);
+    av = get_malloc_state();
+  }
+
+  /* realloc of null is supposed to be same as malloc */
+  if (UNLIKELY(oldmem == 0)) 
+    return mALLOc(bytes);
+
+  checked_request2size(bytes, nb);
+
+  oldp    = hashtable_lookup(oldmem);
+  
+  if (UNLIKELY(!oldp || !inuse(oldp))){ 
+     /* attempt to either realloc memory not managed by us 
+      * or memory that is not in use 
+      */
+#ifdef DNMALLOC_CHECKS
+    if (oldp) {
+      fprintf(stderr, "Attempt to free memory not in use\n");
+      abort();
+    } else {
+      fprintf(stderr, "Attempt to free memory not allocated\n");
+      abort();
+    }
+#endif
+    assert(oldp && inuse(oldp));
+    return 0;     
+  }
+
+  guard_check(av->guard_stored, oldp);
+
+  oldsize = chunksize(oldp);
+
+  check_inuse_chunk(oldp);
+
+  if (!chunk_is_mmapped(oldp)) {
+
+    if (UNLIKELY((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb))) {
+      /* already big enough; split below */
+      newp    = oldp;
+      newsize = oldsize;
+    }
+
+    else {
+      next = next_chunkinfo(oldp);
+      if (next)
+      	next->prev_size = oldsize;
+      /* Try to expand forward into top */
+      if (next && next == av->top &&
+          (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+          (CHUNK_SIZE_T)(nb + MINSIZE)) {
+         set_head_size(oldp, nb);
+         hashtable_remove(chunk(av->top));
+         av->top->chunk = chunk_at_offset(chunk(oldp), nb);
+         set_head(av->top, (newsize - nb) | PREV_INUSE);
+         /* av->top->chunk has been moved move in hashtable */
+         hashtable_insert(oldp, av->top);
+	 guard_set(av->guard_stored, oldp, bytes, nb);
+         return chunk(oldp);
+      }
+      
+      /* Try to expand forward into next chunk;  split off remainder below */
+      else if (next && next != av->top && 
+               !inuse(next) &&
+               (CHUNK_SIZE_T)(newsize = oldsize + chunksize(next)) >=
+               (CHUNK_SIZE_T)(nb)) {
+        newp = oldp;
+        unlink(next, bck, fwd);
+        hashtable_remove(chunk(next));
+        freecilst_add(next);
+	next = next_chunkinfo(oldp);
+	if (next)
+	  next->prev_size = newsize;
+      }
+
+      /* allocate, copy, free */
+      else {
+
+        newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+        if (newmem == 0)
+          return 0; /* propagate failure */
+
+        newp = hashtable_lookup(newmem);
+        newsize = chunksize(newp);
+	
+        /* next = next_chunkinfo(oldp); *//* 'next' never used rw 19.05.2008 */
+        /*
+          Avoid copy if newp is next chunk after oldp.
+        */
+	if (UNLIKELY(is_next_chunk(oldp, newp))) {
+	  newsize += oldsize;
+	  set_head_size(oldp, newsize);
+	  hashtable_skiprm(oldp, newp);
+	  freecilst_add(newp);	  
+          newp = oldp;
+        }
+        else {
+          /*
+            Unroll copy of <= 40 bytes (80 if 8byte sizes)
+            We know that contents have an even number of
+            INTERNAL_SIZE_T-sized words; minimally 4 (2 on amd64).
+          */
+          
+          copysize = oldsize;
+          s = (INTERNAL_SIZE_T*)(oldmem);
+          d = (INTERNAL_SIZE_T*)(newmem);
+          ncopies = copysize / sizeof(INTERNAL_SIZE_T);
+          assert(ncopies >= 2);
+          
+          if (ncopies > 10)
+            MALLOC_COPY(d, s, copysize);
+          
+          else {
+            *(d+0) = *(s+0);
+            *(d+1) = *(s+1);
+	    if (ncopies > 2) {
+	      *(d+2) = *(s+2);
+	      *(d+3) = *(s+3);
+	      if (ncopies > 4) {
+		*(d+4) = *(s+4);
+		*(d+5) = *(s+5);
+		if (ncopies > 6) {
+		  *(d+6) = *(s+6);
+		  *(d+7) = *(s+7);
+		  if (ncopies > 8) {
+		    *(d+8) = *(s+8);
+		    *(d+9) = *(s+9);
+		  }
+                }
+              }
+            }
+          }
+          
+          fREe(oldmem);
+          check_inuse_chunk(newp);
+	  guard_set(av->guard_stored, newp, bytes, nb);
+          return chunk(newp);
+        }
+      }
+    }
+
+    /* If possible, free extra space in old or extended chunk */
+
+    assert((CHUNK_SIZE_T)(newsize) >= (CHUNK_SIZE_T)(nb));
+
+    remainder_size = newsize - nb;
+
+    if (remainder_size >= MINSIZE) { /* split remainder */
+      remainder = cireg_getfree();
+      remainder->chunk = chunk_at_offset(chunk(newp), nb);
+      set_head_size(newp, nb);
+      set_head(remainder, remainder_size | PREV_INUSE | INUSE);
+      remainder->prev_size = nb;
+      hashtable_insert(newp, remainder);
+      /* Mark remainder as inuse so free() won't complain */
+      set_all_inuse(remainder);
+      guard_set(av->guard_stored, remainder, 0, remainder_size);
+      fREe(chunk(remainder)); 
+    }
+    else { /* not enough extra to split off */
+      set_head_size(newp, newsize);
+      set_all_inuse(newp);
+    }
+
+    check_inuse_chunk(newp);
+    guard_set(av->guard_stored, newp, bytes, nb);
+    return chunk(newp);
+  }
+
+  /*
+    Handle mmap cases
+  */
+
+  else {
+
+#if HAVE_MREMAP
+    INTERNAL_SIZE_T offset = (INTERNAL_SIZE_T) oldp->hash_next;
+    size_t pagemask = av->pagesize - 1;
+    char *cp;
+    CHUNK_SIZE_T  sum;
+    
+    /* Note the extra SIZE_SZ overhead */
+    //newsize = (nb + offset + SIZE_SZ + pagemask) & ~pagemask;
+    newsize = (nb + offset + pagemask) & ~pagemask;
+
+    /* don't need to remap if still within same page */
+    if (oldsize == newsize - offset)
+      {
+	guard_set(av->guard_stored, oldp, bytes, nb);
+	return oldmem;
+      }
+
+    cp = (char*)mremap((char*)chunk(oldp) - offset, oldsize + offset, newsize, 1);
+    
+    if (cp != (char*)MORECORE_FAILURE) {
+       
+      hashtable_remove_mmapped(chunk(oldp));
+       
+      oldp->chunk = (mchunkptr)(cp + offset);
+      set_head(oldp, (newsize - offset)|IS_MMAPPED|INUSE);
+      
+      hashtable_add(oldp);
+      
+      assert(aligned_OK(chunk(oldp))); /* rw fix: newp -> oldp */
+      assert(( ((INTERNAL_SIZE_T) oldp->hash_next) == offset));
+      
+      /* update statistics */
+      sum = av->mmapped_mem += newsize - oldsize;
+      if (sum > (CHUNK_SIZE_T)(av->max_mmapped_mem)) 
+        av->max_mmapped_mem = sum;
+      sum += av->sbrked_mem;
+      if (sum > (CHUNK_SIZE_T)(av->max_total_mem)) 
+        av->max_total_mem = sum;
+      
+      guard_set(av->guard_stored, oldp, bytes, nb);
+      return chunk(oldp);
+    }
+#endif /* have MREMAP */
+
+    /* Note the extra SIZE_SZ overhead. */
+    if ((CHUNK_SIZE_T)(oldsize) >= (CHUNK_SIZE_T)(nb + SIZE_SZ)) 
+      newmem = oldmem; /* do nothing */
+    else {
+      /* Must alloc, copy, free. */
+      newmem = mALLOc(nb - MALLOC_ALIGN_MASK);
+      if (newmem != 0) {
+        MALLOC_COPY(newmem, oldmem, oldsize);
+        fREe(oldmem);
+      }
+    }
+    guard_set(av->guard_stored, mem2chunk(newmem), bytes, nb);
+    return newmem;
+  }
+}
+
+/*
+  ---------------------------posix_memalign ----------------------------
+*/
+
+#if __STD_C
+DL_STATIC int posix_mEMALIGn(Void_t** memptr, size_t alignment, size_t bytes)
+#else
+DL_STATIC int posix_mEMALIGn(memptr, alignment, bytes) Void_t** memptr; size_t alignment; size_t bytes;
+#endif
+{
+  mstate av;
+
+  if (alignment % sizeof(void *) != 0)
+    return EINVAL;
+  if ((alignment & (alignment - 1)) != 0)
+    return EINVAL;
+
+  av = get_malloc_state();
+  if (!av || av->max_fast == 0) malloc_consolidate(av);
+  *memptr =  mEMALIGn(alignment, bytes);
+
+  return (*memptr != NULL ? 0 : ENOMEM);
+}
+
+/*
+  ------------------------------ memalign ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC Void_t* mEMALIGn(size_t alignment, size_t bytes)
+#else
+DL_STATIC Void_t* mEMALIGn(alignment, bytes) size_t alignment; size_t bytes;
+#endif
+{
+  INTERNAL_SIZE_T nb;             /* padded  request size */
+  char*           m;              /* memory returned by malloc call */
+  chunkinfoptr       p;              /* corresponding chunk */
+  char*           brk;            /* alignment point within p */
+  chunkinfoptr       newp;           /* chunk to return */
+  INTERNAL_SIZE_T newsize;        /* its size */
+  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */
+  chunkinfoptr       remainder;      /* spare room at end to split off */
+  CHUNK_SIZE_T    remainder_size; /* its size */
+  INTERNAL_SIZE_T size;
+  mstate          av;
+
+  /* If need less alignment than we give anyway, just relay to malloc */
+
+  if (UNLIKELY(alignment <= MALLOC_ALIGNMENT)) return mALLOc(bytes);
+
+  /* Otherwise, ensure that it is at least a minimum chunk size */
+
+  if (alignment <  MINSIZE) alignment = MINSIZE;
+
+  /* Make sure alignment is power of 2 (in case MINSIZE is not).  */
+  if (UNLIKELY((alignment & (alignment - 1)) != 0)) {
+    size_t a = MALLOC_ALIGNMENT * 2;
+    while ((CHUNK_SIZE_T)a < (CHUNK_SIZE_T)alignment) a <<= 1;
+    alignment = a;
+  }
+
+  checked_request2size(bytes, nb);
+
+  /*
+    Strategy: find a spot within that chunk that meets the alignment
+    request, and then possibly free the leading and trailing space.
+  */
+
+
+  /* Call malloc with worst case padding to hit alignment. */
+
+  m  = (char*)(mALLOc(nb + alignment + MINSIZE));
+
+  if (m == 0) return 0; /* propagate failure */
+
+  av = get_malloc_state();
+
+  p = hashtable_lookup((mchunkptr) m);
+
+  if ((((PTR_UINT)(m)) % alignment) != 0) { /* misaligned */
+
+    /*
+      Find an aligned spot inside chunk.  Since we need to give back
+      leading space in a chunk of at least MINSIZE, if the first
+      calculation places us at a spot with less than MINSIZE leader,
+      we can move to the next aligned spot -- we've allocated enough
+      total room so that this is always possible.
+    */
+
+    brk = (char*) ((PTR_UINT)(((PTR_UINT)(m + alignment - 1)) &
+                           -((signed long) alignment)));
+    if ((CHUNK_SIZE_T)(brk - (char*)(chunk(p))) < MINSIZE)
+      brk += alignment;
+
+    newp = cireg_getfree();
+    newp->chunk = (mchunkptr)brk;
+    leadsize = brk - (char*)(chunk(p));
+    newsize = chunksize(p) - leadsize;
+
+    /* For mmapped chunks, just adjust offset */
+    if (UNLIKELY(chunk_is_mmapped(p))) {
+      newp->hash_next = (chunkinfoptr) (((INTERNAL_SIZE_T) p->hash_next) + leadsize);
+      set_head(newp, newsize|IS_MMAPPED|INUSE);
+      hashtable_remove_mmapped(chunk(p));
+      freecilst_add(p);
+      hashtable_add(newp);
+      guard_set(av->guard_stored, newp, bytes, nb);
+      return chunk(newp);
+    }
+
+    /* Otherwise, give back leader, use the rest */
+    set_head(newp, newsize | PREV_INUSE | INUSE);
+    set_head_size(p, leadsize);
+    set_all_inuse(newp);
+    hashtable_add(newp); /* 20.05.2008 rw */
+    guard_set(av->guard_stored, p, 0, leadsize);
+    fREe(chunk(p));
+    p = newp;
+
+    assert (newsize >= nb &&
+            (((PTR_UINT)(chunk(p))) % alignment) == 0);
+  }
+
+  /* Also give back spare room at the end */
+  if (!chunk_is_mmapped(p)) {
+    size = chunksize(p);
+    if ((CHUNK_SIZE_T)(size) > (CHUNK_SIZE_T)(nb + MINSIZE)) {
+       remainder = cireg_getfree();
+       remainder_size = size - nb;
+       remainder->chunk = chunk_at_offset(chunk(p), nb);
+       set_head(remainder, remainder_size | PREV_INUSE | INUSE);
+       set_head_size(p, nb);
+       hashtable_add(remainder); /* 20.05.2008 rw */
+       guard_set(av->guard_stored, remainder, 0, remainder_size);
+       fREe(chunk(remainder));
+    }
+  }
+
+  check_inuse_chunk(p);
+  guard_set(av->guard_stored, p, bytes, nb);
+  return chunk(p);
+}
+
+/*
+  ------------------------------ calloc ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC Void_t* cALLOc(size_t n_elements, size_t elem_size)
+#else
+DL_STATIC Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size;
+#endif
+{
+  chunkinfoptr p;
+  CHUNK_SIZE_T  clearsize;
+  CHUNK_SIZE_T  nclears;
+  INTERNAL_SIZE_T* d;
+  Void_t* mem;
+ 
+  
+  mem = mALLOc(n_elements * elem_size);
+
+  if (mem != 0) {
+    p = hashtable_lookup(mem);
+
+    if (!chunk_is_mmapped(p))
+    {  
+      /*
+        Unroll clear of <= 40 bytes (80 if 8byte sizes)
+        We know that contents have an even number of
+        INTERNAL_SIZE_T-sized words; minimally 4 (2 on amd64).
+      */
+
+      d = (INTERNAL_SIZE_T*)mem;
+      clearsize = chunksize(p);
+      nclears = clearsize / sizeof(INTERNAL_SIZE_T);
+      assert(nclears >= 2);
+
+      if (nclears > 10) {
+        MALLOC_ZERO(d, clearsize);
+      }
+
+      else {
+        *(d+0) = 0;
+        *(d+1) = 0;
+	if (nclears > 2) {
+	  *(d+2) = 0;
+	  *(d+3) = 0;
+	  if (nclears > 4) {
+	    *(d+4) = 0;
+	    *(d+5) = 0;
+	    if (nclears > 6) {
+	      *(d+6) = 0;
+	      *(d+7) = 0;
+	      if (nclears > 8) {
+		*(d+8) = 0;
+		*(d+9) = 0;
+	      }
+            }
+          }
+        }
+      }
+    }
+#if ! MMAP_CLEARS
+    else
+    {
+      d = (INTERNAL_SIZE_T*)mem;
+      clearsize = chunksize(p);
+      MALLOC_ZERO(d, clearsize);
+    }
+#endif
+    /* Set guard again, since we just cleared it
+     */
+    guard_set(get_malloc_state()->guard_stored, p, (n_elements * elem_size), p->size);
+  }
+
+  return mem;
+}
+
+/*
+  ------------------------------ valloc ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC Void_t* vALLOc(size_t bytes)
+#else
+DL_STATIC Void_t* vALLOc(bytes) size_t bytes;
+#endif
+{
+  /* Ensure initialization */
+  mstate av = get_malloc_state();
+  if (!av || av->max_fast == 0) {
+    malloc_consolidate(av);
+    av = get_malloc_state();
+  }
+  return mEMALIGn(av->pagesize, bytes);
+}
+
+/*
+  ------------------------------ pvalloc ------------------------------
+*/
+
+
+#if __STD_C
+DL_STATIC Void_t* pVALLOc(size_t bytes)
+#else
+DL_STATIC Void_t* pVALLOc(bytes) size_t bytes;
+#endif
+{
+  mstate av = get_malloc_state();
+  size_t pagesz;
+
+  /* Ensure initialization */
+  if (!av || av->max_fast == 0) {
+    malloc_consolidate(av);
+    av = get_malloc_state();
+  }
+  pagesz = av->pagesize;
+  return mEMALIGn(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1));
+}
+   
+
+/*
+  ------------------------------ malloc_trim ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC int mTRIm(size_t pad)
+#else
+DL_STATIC int mTRIm(pad) size_t pad;
+#endif
+{
+  mstate av = get_malloc_state();
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+  av = get_malloc_state();
+#ifndef MORECORE_CANNOT_TRIM
+  if (morecore32bit(av))
+    return sYSTRIm(pad, av);
+  else
+    return 0;
+#else
+  return 0;
+#endif
+}
+
+
+
+/*
+  ------------------------- malloc_usable_size -------------------------
+*/
+
+#if __STD_C
+DL_STATIC size_t mUSABLe(Void_t* mem)
+#else
+DL_STATIC size_t mUSABLe(mem) Void_t* mem;
+#endif
+{
+  chunkinfoptr p;
+  if (mem != 0) {
+    p = hashtable_lookup(mem);
+    if (p && inuse(p)) return chunksize(p);
+  }
+  return 0;
+}
+
+/*
+  ------------------------------ mallinfo ------------------------------
+*/
+
+DL_STATIC struct mallinfo mALLINFo()
+{
+  mstate av = get_malloc_state();
+  struct mallinfo mi;
+  unsigned int i;
+  mbinptr b;
+  chunkinfoptr p;
+  INTERNAL_SIZE_T avail;
+  INTERNAL_SIZE_T fastavail;
+  int nblocks;
+  int nfastblocks;
+
+  /* Ensure initialization */
+  if (!av || av->top == 0) {
+    malloc_consolidate(av);
+    av = get_malloc_state();
+  }
+  check_malloc_state();
+
+  /* Account for top */
+  avail = chunksize(av->top);
+  nblocks = 1;  /* top always exists */
+
+  /* traverse fastbins */
+  nfastblocks = 0;
+  fastavail = 0;
+
+  for (i = 0; i < NFASTBINS; ++i) {
+    for (p = av->fastbins[i]; p != 0; p = p->fd) {
+      ++nfastblocks;
+      fastavail += chunksize(p);
+    }
+  }
+
+  avail += fastavail;
+
+  /* traverse regular bins */
+  for (i = 1; i < NBINS; ++i) {
+    b = bin_at(av, i);
+    for (p = last(b); p != b; p = p->bk) {
+      ++nblocks;
+      avail += chunksize(p);
+    }
+  }
+
+  mi.smblks = nfastblocks;
+  mi.ordblks = nblocks;
+  mi.fordblks = avail;
+  mi.uordblks = av->sbrked_mem - avail;
+  mi.arena = av->sbrked_mem;
+  mi.hblks = av->n_mmaps;
+  mi.hblkhd = av->mmapped_mem;
+  mi.fsmblks = fastavail;
+  mi.keepcost = chunksize(av->top);
+  mi.usmblks = av->max_total_mem;
+  return mi;
+}
+
+/*
+  ------------------------------ malloc_stats ------------------------------
+*/
+
+DL_STATIC void mSTATs()
+{
+  struct mallinfo mi = mALLINFo();
+
+  fprintf(stderr, "hashtable = %10lu MB\n", 
+	  (CHUNK_SIZE_T)(HASHTABLESIZE / (1024*1024)));
+  fprintf(stderr, "max system bytes = %10lu\n",
+          (CHUNK_SIZE_T)(mi.usmblks));
+  fprintf(stderr, "system bytes     = %10lu  (%10lu sbrked, %10lu mmaped)\n",
+          (CHUNK_SIZE_T)(mi.arena + mi.hblkhd),
+          (CHUNK_SIZE_T)(mi.arena),
+          (CHUNK_SIZE_T)(mi.hblkhd));
+  fprintf(stderr, "in use bytes     = %10lu\n",
+          (CHUNK_SIZE_T)(mi.uordblks + mi.hblkhd));
+
+}
+
+
+/*
+  ------------------------------ mallopt ------------------------------
+*/
+
+#if __STD_C
+DL_STATIC int mALLOPt(int param_number, int value)
+#else
+DL_STATIC int mALLOPt(param_number, value) int param_number; int value;
+#endif
+{
+  mstate av = get_malloc_state();
+  /* Ensure initialization/consolidation */
+  malloc_consolidate(av);
+  av = get_malloc_state();
+
+  switch(param_number) {
+  case M_MXFAST:
+    if (value >= 0 && value <= MAX_FAST_SIZE) {
+      set_max_fast(av, value);
+      return 1;
+    }
+    else
+      return 0;
+
+  case M_TRIM_THRESHOLD:
+    av->trim_threshold = value;
+    return 1;
+
+  case M_TOP_PAD:
+    av->top_pad = value;
+    return 1;
+
+  case M_MMAP_THRESHOLD:
+    av->mmap_threshold = value;
+    return 1;
+
+  case M_MMAP_MAX:
+    if (value != 0)
+      return 0;
+    av->n_mmaps_max = value;
+    return 1;
+
+  default:
+    return 0;
+  }
+}
+
+
+/*	$OpenBSD: arc4random.c,v 1.19 2008/06/04 00:50:23 djm Exp $	*/
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Arc4 random number generator for OpenBSD.
+ *
+ * This code is derived from section 17.1 of Applied Cryptography,
+ * second edition, which describes a stream cipher allegedly
+ * compatible with RSA Labs "RC4" cipher (the actual description of
+ * which is a trade secret).  The same algorithm is used as a stream
+ * cipher called "arcfour" in Tatu Ylonen's ssh package.
+ *
+ * Here the stream cipher has been modified always to include the time
+ * when initializing the state.  That makes it impossible to
+ * regenerate the same random sequence twice, so this can't be used
+ * for encryption, but will generate good random numbers.
+ *
+ * RC4 is a registered trademark of RSA Laboratories.
+ */
+
+/* Moved u_int8_t -> unsigned char (portability)
+ * Eliminated unneeded functions, added read from /dev/urandom taken from:
+ $MirOS: contrib/code/Snippets/arc4random.c,v 1.3 2008-03-04 22:53:14 tg Exp $
+ * Modified by Robert Connolly from OpenBSD lib/libc/crypt/arc4random.c v1.11.
+ * This is arc4random(3) using urandom.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/param.h>
+#include <sys/time.h>
+
+struct arc4_stream {
+	unsigned char i;
+	unsigned char j;
+	unsigned char s[256];
+};
+
+static int rs_initialized;
+static struct arc4_stream rs;
+static pid_t arc4_stir_pid;
+static int arc4_count;
+
+static unsigned char arc4_getbyte(void);
+
+static void
+arc4_init(void)
+{
+	int     n;
+
+	for (n = 0; n < 256; n++)
+		rs.s[n] = n;
+	rs.i = 0;
+	rs.j = 0;
+}
+
+static inline void
+arc4_addrandom(unsigned char *dat, int datlen)
+{
+	int     n;
+	unsigned char si;
+
+	rs.i--;
+	for (n = 0; n < 256; n++) {
+		rs.i = (rs.i + 1);
+		si = rs.s[rs.i];
+		rs.j = (rs.j + si + dat[n % datlen]);
+		rs.s[rs.i] = rs.s[rs.j];
+		rs.s[rs.j] = si;
+	}
+	rs.j = rs.i;
+}
+
+#ifdef HAVE_SCHED_H
+#include <sched.h>
+#endif
+
+static void
+arc4_stir(void)
+{
+	int     i;
+        struct {
+                struct timeval tv1;
+                struct timeval tv2;
+                u_int rnd[(128 - 2*sizeof(struct timeval)) / sizeof(u_int)];
+        } rdat;
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+        size_t sz = 0;
+	int    fd;
+#endif
+ 
+        gettimeofday(&rdat.tv1, NULL);
+
+
+	if (!rs_initialized) {
+		arc4_init();
+		rs_initialized = 1;
+	}
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+
+#ifdef HAVE_SCHED_YIELD
+	/* Yield the processor to introduce some random delay. */
+	(void) sched_yield();
+#endif
+
+	/*
+	 * Pthread problem in multithreaded code on *BSD.
+	 */
+        fd = open("/dev/urandom", O_RDONLY);
+        if (fd != -1) {
+                sz = (size_t)read(fd, rdat.rnd, sizeof (rdat.rnd));
+                close(fd);
+        }
+        if (sz > sizeof (rdat.rnd))
+                sz = 0;
+ #endif
+
+	arc4_stir_pid = getpid();
+        gettimeofday(&rdat.tv2, NULL);
+
+        arc4_addrandom((void *)&rdat, sizeof(rdat));
+
+	/*
+	 * Discard early keystream, as per recommendations in:
+	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
+	 */
+	for (i = 0; i < 256; i++)
+		(void)arc4_getbyte();
+	arc4_count = 1600000;
+}
+
+static unsigned char
+arc4_getbyte(void)
+{
+	unsigned char si, sj;
+
+	rs.i = (rs.i + 1);
+	si = rs.s[rs.i];
+	rs.j = (rs.j + si);
+	sj = rs.s[rs.j];
+	rs.s[rs.i] = sj;
+	rs.s[rs.j] = si;
+	return (rs.s[(si + sj) & 0xff]);
+}
+
+
+ /* Changed to return char* */
+static char *
+dnmalloc_arc4random(void)
+{
+	static char val[4];
+	
+	/* We only call this once, hence no need for locking. */
+
+	/* _ARC4_LOCK(); */
+	arc4_count -= 4;
+	if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != getpid())
+		arc4_stir();
+
+	val[0] = (char) arc4_getbyte();
+	val[1] = (char) arc4_getbyte();
+	val[2] = (char) arc4_getbyte();
+	val[3] = (char) arc4_getbyte();
+
+	arc4_stir();
+	/* _ARC4_UNLOCK(); */
+	return val;
+}
+
+#else
+int dnmalloc_pthread_init() { return 0; }
+#endif /* ! USE_SYSTEM_MALLOC */
Index: trunk/src/make-tests.sh
===================================================================
--- trunk/src/make-tests.sh	(revision 170)
+++ trunk/src/make-tests.sh	(revision 171)
@@ -57,4 +57,8 @@
 int main(void)
 {
+#if !defined(USE_SYSTEM_MALLOC) && defined(USE_MALLOC_LOCK)
+    extern int dnmalloc_pthread_init(void);
+    dnmalloc_pthread_init();
+#endif
     int retval;
     retval = RunAllTests();
Index: trunk/src/samhain.c
===================================================================
--- trunk/src/samhain.c	(revision 170)
+++ trunk/src/samhain.c	(revision 171)
@@ -169,4 +169,9 @@
 void sh_g_init(void)
 {
+#if !defined(USE_SYSTEM_MALLOC) && defined(USE_MALLOC_LOCK)
+  extern int dnmalloc_pthread_init(void);
+  dnmalloc_pthread_init();
+#endif
+
   if (0 != pthread_key_create(&g_key, sh_g_destroy))
     {
@@ -2152,4 +2157,13 @@
 #endif
 
+#if 0
+  {
+    char command[128];
+    sprintf(command, "/bin/cat /proc/%d/status", (int) getpid());
+    system(command); /* flawfinder: ignore *//* debug code */
+    malloc_stats();
+  }
+#endif
+
   aud_exit (FIL__, __LINE__, EXIT_SUCCESS);
   SL_RETURN(0, _("main"));
Index: trunk/src/samhain_setpwd.c
===================================================================
--- trunk/src/samhain_setpwd.c	(revision 170)
+++ trunk/src/samhain_setpwd.c	(revision 171)
@@ -8,4 +8,5 @@
 #include <unistd.h>
 #include <sys/types.h>
+#include <signal.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
@@ -70,5 +71,5 @@
     }
 
-  while (nbytes) {
+  do {
     count = read(fd, &buf[where], nbytes);
     if (count == -1 && errno == EINTR)
@@ -76,5 +77,5 @@
     where  += count;
     nbytes -= count;
-  } while (count == -1 && errno == EINTR);
+  } while (nbytes);
 
   close(fd);
Index: trunk/src/sh_calls.c
===================================================================
--- trunk/src/sh_calls.c	(revision 170)
+++ trunk/src/sh_calls.c	(revision 171)
@@ -183,5 +183,5 @@
 	val_retry = 
 	  /*@-unrecog@*/connect(sockfd, serv_addr, addrlen)/*@+unrecog@*/;
-      } while (val_retry < 0 && errno == EINTR);
+      } while (val_retry < 0 && (errno == EINTR || errno == EINPROGRESS));
     }
 
Index: trunk/src/sh_entropy.c
===================================================================
--- trunk/src/sh_entropy.c	(revision 170)
+++ trunk/src/sh_entropy.c	(revision 171)
@@ -147,4 +147,5 @@
         struct sockaddr_un addr;
         int addr_len;
+	int retval;
 
 #ifdef EGD_SOCKET_NAME
@@ -184,5 +185,8 @@
 	    SL_RETURN( -1, _("sh_entropy") );
 	  }
-        if( connect( fd, (struct sockaddr*)&addr, addr_len) == -1 )
+	do {
+	  retval = connect(fd, (struct sockaddr *) &sinr, sizeof(sinr));
+	} while (retval < 0 && (errno == EINTR || errno == EINPROGRESS));
+        if( retval == -1 )
 	  {
 	    myerror = errno;
@@ -682,4 +686,5 @@
 	status = -1;
       }
+#if !defined(USE_UNO)
     else if (WIFSIGNALED(status))
       {
@@ -692,4 +697,5 @@
 	status = -1;
       }
+#endif
 
     source->pipe = NULL;
Index: trunk/src/sh_files.c
===================================================================
--- trunk/src/sh_files.c	(revision 170)
+++ trunk/src/sh_files.c	(revision 171)
@@ -2254,9 +2254,6 @@
 int sh_files_test_double (zAVLTree * firstList, zAVLTree * secondList)
 {
-  int          count;
   int          retval = 0;
-
   zAVLCursor   cursor;
-
   dirstack_t * first;
 
@@ -2267,5 +2264,4 @@
       if (NULL != zAVLSearch(secondList, first->name))
 	{
-	  ++count;
 	  sh_error_handle ((-1), FIL__, __LINE__, 0, MSG_FI_DOUBLE,
 			   first->name);
Index: trunk/src/sh_forward.c
===================================================================
--- trunk/src/sh_forward.c	(revision 170)
+++ trunk/src/sh_forward.c	(revision 171)
@@ -2644,5 +2644,4 @@
 			       sizeof(addr_peer.sin_addr))) 
 		break;
-	      ++i;
 	    }
 	}
@@ -4916,4 +4915,6 @@
   struct  sigaction  new_act;
   struct  sigaction  old_act;
+
+  int setsize_fd;
   
   SL_ENTER(_("sh_receive"));
@@ -4946,6 +4947,9 @@
    * The POSIX lower limit on open files seems to be eight. 
    */
-  maxconn = get_open_max() - 6;
-  maxconn = (((int)FD_SETSIZE) < maxconn) ? FD_SETSIZE : maxconn;
+  maxconn    = get_open_max() - 6;
+  /* ugly fix for FreeBSD compiler warning; casting FD_SETSIZE in the
+   * conditional expression does not suppress the warning... */
+  setsize_fd = (int)FD_SETSIZE;
+  maxconn = (setsize_fd < maxconn) ? setsize_fd : maxconn;
 
   if (maxconn < 0 || !sl_ok_muls(maxconn, sizeof(sh_conn_t)))
Index: trunk/src/sh_getopt.c
===================================================================
--- trunk/src/sh_getopt.c	(revision 170)
+++ trunk/src/sh_getopt.c	(revision 171)
@@ -315,27 +315,36 @@
 static void sh_getopt_print_log_facilities (void)
 {
-  fputs (_("Compiled-in log facilities:"), stdout);
+  int num = 0;
+
+  fputs (_("Compiled-in log facilities:\n"), stdout);
 
 #ifndef DEFAULT_CONSOLE
-  printf (_(" console (/dev/console)"));
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" console (/dev/console)")); ++num;
 #else
+  if (num > 0) fputc ('\n', stdout);
   if (0 == strcmp (DEFAULT_CONSOLE, _("NULL")))
-    printf (_(" console (/dev/console)"));
+    { printf (_("console (/dev/console)"));  ++num; }
   else
-    printf (_(" console (%s)"), DEFAULT_CONSOLE);
-#endif
-  fputs  (_(", syslog"), stdout);
-  printf (_(", logfile (%s)"), DEFAULT_ERRFILE);
+    { printf (_("console (%s)"), DEFAULT_CONSOLE);  ++num; }
+#endif
+  if (num > 0) fputc ('\n', stdout);
+  fputs  (_(" syslog"), stdout); ++num;
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" logfile (%s)"), DEFAULT_ERRFILE); ++num;
 
 #if defined(WITH_EXTERNAL)
-  fputs (_(", external program"), stdout);
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" external program"), stdout); ++num;
 #endif
 
 #if defined(WITH_MESSAGE_QUEUE)
-  fputs (_(", message queue"), stdout);
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" message queue"), stdout); ++num;
 #endif
  
 #if defined(WITH_DATABASE)
-  fputs (_(", database"), stdout);
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" database"), stdout); ++num;
 #ifdef WITH_ODBC
   fputs (_(" (odbc)"), stdout);
@@ -353,126 +362,19 @@
 
 #if defined(SH_WITH_CLIENT) || defined(SH_WITH_SERVER)
-  fputs (_(", server"), stdout);
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" server"), stdout); ++num;
 #endif
 
 #if defined(SH_WITH_MAIL)
-  fputs (_(", email"), stdout);
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" email"), stdout); ++num;
 #endif
 
 #ifdef HAVE_LIBPRELUDE
+  if (num > 0) fputc ('\n', stdout); ++num;
 #ifdef HAVE_LIBPRELUDE_8
-  fputs (_(", prelude (0.8)"), stdout);
+  fputs (_(" prelude (0.8)"), stdout);
 #else
-  fputs (_(", prelude (0.9+)"), stdout);
-#endif
-#endif
-
-  fputc ('\n', stdout);
-  return;
-}
-
-static void sh_getopt_print_options (void)
-{
-  int num = 0;
-
-
-#if defined(SH_STANDALONE)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_("Standalone executable"), stdout); ++num;
-#endif
-#if defined(SH_WITH_CLIENT)
-  if (num > 0) fputc ('\n', stdout);
-  printf (_("Client executable (port %d)"), SH_DEFAULT_PORT); ++num;
-#endif
-#if defined(SH_WITH_CLIENT)
-  if (num > 0) fputc ('\n', stdout);
-  printf (_("Server executable (port %d, user %s)"), 
-	  SH_DEFAULT_PORT, DEFAULT_IDENT); 
-  ++num;
-#endif
-
-  fputs (_(", compiled-in options:"), stdout);
-
-#if defined(HAVE_EGD_RANDOM)
-  if (num > 0) fputc ('\n', stdout);
-  printf (_(" use entropy gathering daemon (%s)"), EGD_SOCKET_NAME); ++num;
-#endif
-#if defined(HAVE_UNIX_RANDOM)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" use unix entropy gatherer"), stdout); ++num;
-#endif
-#if defined(HAVE_URANDOM)
-  if (num > 0) fputc ('\n', stdout);
-  printf (_(" use entropy device (%s)"), NAME_OF_DEV_RANDOM); ++num;
-#endif
-
-#ifdef WITH_GPG
-  if (num > 0) fputc ('\n', stdout);
-  printf (_(" GnuPG signatures (%s)"), DEFAULT_GPG_PATH); ++num;
-#ifdef HAVE_GPG_CHECKSUM
-  if (num > 0) fputc ('\n', stdout);
-  printf (_("   -- GnuPG checksum:  %s"), GPG_HASH); ++num;
-#endif
-#ifdef USE_FINGERPRINT
-  if (num > 0) fputc ('\n', stdout);
-  printf (_("   -- Key fingerprint: %s"), SH_GPG_FP); ++num;
-#endif
-#endif
-
-#if defined(SL_DEBUG)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" debug build (don't use for production)"), stdout); ++num;
-#endif
-#if defined(SCREW_IT_UP)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" anti-debugger"), stdout); ++num;
-#endif
-#if defined(SH_USE_XML)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" xml log format"), stdout); ++num;
-#endif
-#if defined(HAVE_NTIME)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" use time server"), stdout); ++num;
-#endif
-
-#if defined(SH_WITH_CLIENT) || defined(SH_STANDALONE)
-#if defined(HAVE_LIBZ)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" optionally store full text for files"), stdout); ++num;
-#endif
-#if defined(USE_XATTR)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" check SELinux attributes"), stdout); ++num;
-#endif
-#if defined(USE_ACL)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" check Posix ACLs"), stdout); ++num;
-#endif
-#if defined(RELOAD_DATABASE)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" fetch database on reload"), stdout); ++num;
-#endif
-#endif
-
-#if defined(SH_WITH_SERVER)
-
-#if !defined(HAVE_GETPEEREID) && !defined(SO_PEERCRED) && !defined(HAVE_STRUCT_CMSGCRED) && !defined(HAVE_STRUCT_FCRED) && !(defined(HAVE_STRUCT_SOCKCRED) && defined(LOCAL_CREDS))
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" command socket authentication: use SetSocketPassword"), stdout); 
-  ++num;
-#else
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" command socket authentication: use SetSocketAllowUID"), stdout); 
-  ++num;
-#endif
-
-#if defined(SH_USE_LIBWRAP)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" support tcp wrapper"), stdout); ++num;
-#endif
-#if defined(INET_SYSLOG)
-  if (num > 0) fputc ('\n', stdout);
-  fputs (_(" support listening on 514/udp (syslog)"), stdout); ++num;
+  fputs (_(" prelude (0.9+)"), stdout);
 #endif
 #endif
@@ -484,4 +386,124 @@
 }
 
+static void sh_getopt_print_options (void)
+{
+  int num = 0;
+
+
+#if defined(SH_STANDALONE)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_("Standalone executable"), stdout); ++num;
+#endif
+#if defined(SH_WITH_CLIENT)
+  if (num > 0) fputc ('\n', stdout);
+  printf (_("Client executable (port %d)"), SH_DEFAULT_PORT); ++num;
+#endif
+#if defined(SH_WITH_CLIENT)
+  if (num > 0) fputc ('\n', stdout);
+  printf (_("Server executable (port %d, user %s)"), 
+	  SH_DEFAULT_PORT, DEFAULT_IDENT); 
+  ++num;
+#endif
+
+  fputs (_(", compiled-in options:"), stdout);
+
+#if defined(USE_DL_PREFIX)
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" using system malloc")); ++num;
+#else
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" using dnmalloc")); ++num;
+#endif
+
+#if defined(HAVE_EGD_RANDOM)
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" using entropy gathering daemon (%s)"), EGD_SOCKET_NAME); ++num;
+#endif
+#if defined(HAVE_UNIX_RANDOM)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" using unix entropy gatherer"), stdout); ++num;
+#endif
+#if defined(HAVE_URANDOM)
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" using entropy device (%s)"), NAME_OF_DEV_RANDOM); ++num;
+#endif
+
+#ifdef WITH_GPG
+  if (num > 0) fputc ('\n', stdout);
+  printf (_(" GnuPG signatures (%s)"), DEFAULT_GPG_PATH); ++num;
+#ifdef HAVE_GPG_CHECKSUM
+  if (num > 0) fputc ('\n', stdout);
+  printf (_("   -- GnuPG checksum:  %s"), GPG_HASH); ++num;
+#endif
+#ifdef USE_FINGERPRINT
+  if (num > 0) fputc ('\n', stdout);
+  printf (_("   -- Key fingerprint: %s"), SH_GPG_FP); ++num;
+#endif
+#endif
+
+#if defined(SL_DEBUG)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" debug build (do not use for production)"), stdout); ++num;
+#endif
+#if defined(SCREW_IT_UP)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" anti-debugger"), stdout); ++num;
+#endif
+#if defined(SH_USE_XML)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" xml log format"), stdout); ++num;
+#endif
+#if defined(HAVE_NTIME)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" using time server"), stdout); ++num;
+#endif
+
+#if defined(SH_WITH_CLIENT) || defined(SH_STANDALONE)
+#if defined(HAVE_LIBZ)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" optionally store full text for files"), stdout); ++num;
+#endif
+#if defined(USE_XATTR)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" check SELinux attributes"), stdout); ++num;
+#endif
+#if defined(USE_ACL)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" check Posix ACLs"), stdout); ++num;
+#endif
+#if defined(RELOAD_DATABASE)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" fetch database on reload"), stdout); ++num;
+#endif
+#endif
+
+#if defined(SH_WITH_SERVER)
+
+#if !defined(HAVE_GETPEEREID) && !defined(SO_PEERCRED) && !defined(HAVE_STRUCT_CMSGCRED) && !defined(HAVE_STRUCT_FCRED) && !(defined(HAVE_STRUCT_SOCKCRED) && defined(LOCAL_CREDS))
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" command socket authentication: use SetSocketPassword"), stdout); 
+  ++num;
+#else
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" command socket authentication: use SetSocketAllowUID"), stdout); 
+  ++num;
+#endif
+
+#if defined(SH_USE_LIBWRAP)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" support tcp wrapper"), stdout); ++num;
+#endif
+#if defined(INET_SYSLOG)
+  if (num > 0) fputc ('\n', stdout);
+  fputs (_(" support listening on 514/udp (syslog)"), stdout); ++num;
+#endif
+#endif
+
+  if (num == 0)
+    fputs (_(" none"), stdout);
+  fputc ('\n', stdout);
+  return;
+}
+
 static void sh_getopt_print_modules (void)
 {
@@ -489,5 +511,5 @@
   int num = 0;
   
-  fputs (_("Compiled-in modules:"), stdout);
+  fputs (_("Compiled-in modules:\n"), stdout);
 #ifdef SH_USE_UTMP
   if (num > 0) fputc (',', stdout);
Index: trunk/src/sh_hash.c
===================================================================
--- trunk/src/sh_hash.c	(revision 170)
+++ trunk/src/sh_hash.c	(revision 171)
@@ -1585,11 +1585,4 @@
       {
 	sl_strlcpy(fullpath, buf->fullpath, MAX_PATH_STORE+1);
-	/*
-	if (sl_strlen(buf->fullpath) < (MAX_PATH_STORE-3))
-	  {
-	    fullpath[MAX_PATH_STORE-2] = '\0';
-	    fullpath[MAX_PATH_STORE-1] = '\n';
-	  }
-	*/
       } 
     else 
Index: trunk/src/sh_mail.c
===================================================================
--- trunk/src/sh_mail.c	(revision 170)
+++ trunk/src/sh_mail.c	(revision 171)
@@ -1883,5 +1883,5 @@
   } querybuf;
 
-  querybuf reply;
+  querybuf * reply;
   char expanded[1024];
   unsigned char * comp_dn, * eom;
@@ -1896,7 +1896,9 @@
     SL_RETURN (NULL, _("get_mx"));
 
+  reply = SH_ALLOC(sizeof(querybuf));
+
   errno = 0;
   length = res_query (hostname, C_IN, T_MX, 
-		      (unsigned char *) &reply, 4095);
+		      (unsigned char *) reply, 4095);
   if (length < 1)
     {
@@ -1925,17 +1927,18 @@
 #endif
 	}
+      SH_FREE(reply);
       SL_RETURN (NULL, _("get_mx"));
     }
 
   ret = 0;
-  header  = (HEADER *) &reply;
+  header  = (HEADER *) reply;
 
   /* start of data section
    */
-  comp_dn = (unsigned char *) &reply + HFIXEDSZ;
+  comp_dn = (unsigned char *) reply + HFIXEDSZ;
 
   /* end-of-message
    */
-  eom     = (unsigned char *) &reply + length;
+  eom     = (unsigned char *) reply + length;
 
   /* HEADER NAME  -- must be skipped or decompressed
@@ -1958,13 +1961,23 @@
       comp_dn += ret + QFIXEDSZ;
       if (ret < 1 || comp_dn >= eom)
-	SL_RETURN (NULL, _("get_mx"));
+	{
+	  SH_FREE(reply);
+	  SL_RETURN (NULL, _("get_mx"));
+	}
     }
   count         = ntohs (header->ancount);
   if (count < 1)
-    SL_RETURN (NULL, _("get_mx"));
+    {
+      SH_FREE(reply);
+      SL_RETURN (NULL, _("get_mx"));
+    }
 
   retval        = SH_ALLOC (sizeof (dnsrep));
   if (!retval)
-    SL_RETURN (NULL, _("get_mx"));
+    {
+      SH_FREE(reply);
+      SL_RETURN (NULL, _("get_mx"));
+    }
+
   retval->count = count;
 
@@ -1973,4 +1986,5 @@
   if (!sl_ok_muls(count, sizeof (mx)))
     {
+      SH_FREE(reply);
       SH_FREE   (retval);
       SL_RETURN (NULL, _("get_mx"));
@@ -1981,4 +1995,5 @@
   if (!result)
     {
+      SH_FREE(reply);
       SH_FREE   (retval);
       SL_RETURN (NULL, _("get_mx"));
@@ -1995,4 +2010,5 @@
       if (ret < 1 || comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2006,4 +2022,5 @@
       if (type != T_MX || comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2017,4 +2034,5 @@
       if (comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2027,4 +2045,5 @@
       if (comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2038,4 +2057,5 @@
       if (rdlength < 1 || comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2049,4 +2069,5 @@
       if (comp_dn >= eom)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2059,4 +2080,5 @@
       if (ret < 1)
 	{
+	  SH_FREE(reply);
 	  SH_FREE (result);
 	  SH_FREE (retval);
@@ -2074,4 +2096,5 @@
   while (ret > 0 && comp_dn < eom && count);
 
+  SH_FREE(reply);
   SL_RETURN (retval, _("get_mx"));
 }
Index: trunk/src/sh_portcheck.c
===================================================================
--- trunk/src/sh_portcheck.c	(revision 170)
+++ trunk/src/sh_portcheck.c	(revision 171)
@@ -687,5 +687,5 @@
   do {
     retval = connect(fd, (struct sockaddr *) &sinr, sizeof(sinr));
-  } while (retval < 0 && errno == EINTR);
+  } while (retval < 0 && (errno == EINTR || errno == EINPROGRESS));
 
   if (retval == -1)
@@ -775,5 +775,5 @@
   do {
     retval = connect(fd, (struct sockaddr *) &sinr, sizeof(sinr));
-  } while (retval < 0 && errno == EINTR);
+  } while (retval < 0 && (errno == EINTR || errno == EINPROGRESS));
 
   if (retval == -1 && errno == ECONNREFUSED)
Index: trunk/src/sh_string.c
===================================================================
--- trunk/src/sh_string.c	(revision 170)
+++ trunk/src/sh_string.c	(revision 171)
@@ -4,4 +4,5 @@
 #include <string.h>
 #include <stdio.h>
+#include <sys/types.h>
 
 #include "sh_string.h"
@@ -40,5 +41,5 @@
       /* skip leading WS 
        */
-      for (s=e; *s && isspace(*s); ++s) /* nothing */;
+      for (s=e; *s && isspace((int)*s); ++s) /* nothing */;
 
       if (*s) 
@@ -65,12 +66,28 @@
           if (a != line)
             {
-              /* chop off trailing WS 
-               */
-              for (a--; isspace(*a) && a > s; a--) /* do nothing */;
-              
-              /* terminate string
-               */
-              ++a; *a = '\0';
-            }
+	      if (i < (maxfields -1))
+                {
+
+		  /* chop off trailing WS 
+		   */
+		  for (a--; isspace((int)*a) && a > s; a--) /* do nothing */;
+		  
+		  /* terminate string
+		   */
+		  ++a; *a = '\0';
+		}
+              else
+                {
+                  /* If nfields < actual fields, last string 
+                   * will be remainder, therefore skip to end.
+                   */
+                  if ( *a )
+                    {
+                      do {
+                        a++;
+                      } while ( *a );
+                    }
+                }
+	    }
           else
             {
@@ -127,9 +144,9 @@
       /* skip leading WS 
        */
-      if ( *s && isspace(*s) )
+      if ( *s && isspace((int)*s) )
         {
           do {
             ++s;
-          } while ( *s && isspace(*s) );
+          } while ( *s && isspace((int)*s) );
         }
 
@@ -142,5 +159,5 @@
           do {
             a++;
-          } while ( *a && (!isspace(*a)) );
+          } while ( *a && (!isspace((int)*a)) );
 
           /* next token, *a is either ws or '\0' 
@@ -152,5 +169,5 @@
           if (i < (maxfields-1))
 	    {
-              *a = '\0'; 
+              *a = '\0';
 	    }
 	  else
@@ -275,4 +292,5 @@
     }
   memcpy(s->str, str, (len+1));
+  s->len = len;
   return s;
 }
@@ -397,4 +415,5 @@
   sh_string * r = NULL;
   char * p;
+  long   tlen;
   size_t len;
   int    end    = 0;
@@ -403,5 +422,5 @@
   size_t newlen = 0;
   long   diff;
-  int    i;
+  int    i, curr, last;
 
 
@@ -443,5 +462,18 @@
     }
 
-  if (r && ovecnum > 0)
+
+  curr = -1;
+  last = -1;
+
+  for (i = 0; i < ovecnum; ++i)
+    {
+      if (ovector[2*i] >= 0)
+        {
+          curr = i;
+          break;
+        }
+    }
+  
+  if (r && ovecnum > 0 && ovector[curr] >= 0)
     {
       r->len = 0; r->str[0] = '\0'; p = r->str;
@@ -449,29 +481,57 @@
       /* First part, until start of first replacement 
        */
-      memcpy(p, s->str,      ovector[0]); p += ovector[0];
-      memcpy(p, replacement, rlen);       p += rlen;
-      *p = '\0'; r->len += (ovector[0] + rlen);
+      memcpy(p, s->str, (size_t)ovector[curr]); p += ovector[curr];
+      memcpy(p, replacement,    rlen);       p += rlen;
+      *p = '\0'; r->len += (ovector[curr] + rlen);
+
+      last = curr + 1;
 
       for (i = 1; i < ovecnum; ++i)
         {
+          if (ovector[2*i] < 0)
+            continue;
+
+          curr = 2*i;
+
           /* From end of last replacement to start of this */
-          len = ovector[2*i] - ovector[2*i -1];
-          memcpy(p, &(s->str[ovector[2*i -1]]), len);
-          p += len;
-
-          /* The replacement */
-          memcpy(p, replacement, rlen);       
-          p += rlen;
-
-          /* null terminate */
-          *p = '\0'; r->len += (len + rlen);
-        }
+          tlen = (long)(ovector[curr] - ovector[last]);
+          if (tlen >= 0)
+            {
+              len = (size_t) tlen;
+
+              if (tlen > 0)
+                {
+                  memcpy(p, &(s->str[ovector[last]]), (size_t)len);
+                  p += len;
+                }
+              
+              /* The replacement */
+              memcpy(p, replacement, rlen);       
+              p += rlen;
+              
+              /* null terminate */
+              *p = '\0'; r->len += (len + rlen);
+
+              last = curr + 1;
+            }
+	}
 
       /* Last part, after last replacement; includes terminating null 
        */
-      len = (s->len + 1) - ovector[2*i -1];
-      memcpy(p, &(s->str[ovector[2*i -1]]), len);
-      p += len; *p = '\0'; r->len += (len - 1);
-    }
+      if (last > 0)
+        {
+          /* If not, nothing has been replaced, and r is still a copy of s
+           */
+          tlen = (long)((s->len + 1) - ovector[last]);
+          if (tlen > 0)
+            {
+              len = (size_t)tlen;
+              memcpy(p, &(s->str[ovector[2*i -1]]), (size_t)len);
+              p += len; *p = '\0'; r->len += (len - 1);
+            }
+        }
+
+    }
+
   return r;
 }
@@ -785,4 +845,5 @@
   t = sh_string_replace(s, ovector, ovecnum, 
                         "___", 3);
+  CuAssertPtrNotNull(tc, t);
   CuAssertStrEquals(tc, "___c ___ ",   t->str);
   CuAssertIntEquals(tc, 9, (int)t->len);
@@ -792,4 +853,5 @@
   t = sh_string_replace(s, ovector, ovecnum, 
                         "___", 3);
+  CuAssertPtrNotNull(tc, t);
   CuAssertStrEquals(tc, "___c ___",   t->str);
   CuAssertIntEquals(tc, 8, (int)t->len);
@@ -806,4 +868,5 @@
                         "___", 3);
   
+  CuAssertPtrNotNull(tc, t);
   CuAssertStrEquals(tc, "______f ghi ",   t->str);
   CuAssertIntEquals(tc, 12, (int)t->len);
@@ -813,4 +876,5 @@
   t = sh_string_replace(s, ovector, ovecnum, 
                         "___", 3);
+  CuAssertPtrNotNull(tc, t);
   CuAssertStrEquals(tc, "abc ___ef ghi___",   t->str);
   CuAssertIntEquals(tc, 16, (int)t->len);
@@ -818,4 +882,5 @@
   t = sh_string_replace(s, ovector, 0, 
                         "___", 3);
+  CuAssertPtrNotNull(tc, t);
   CuAssertStrEquals(tc, s->str,   t->str);
   CuAssertIntEquals(tc, (int)s->len, (int)t->len);
Index: trunk/src/sh_tiger0.c
===================================================================
--- trunk/src/sh_tiger0.c	(revision 170)
+++ trunk/src/sh_tiger0.c	(revision 171)
@@ -843,5 +843,5 @@
   static const int BLOCKSIZE = 8192;
   struct md5_ctx ctx;
-  char buffer[8264]; /* BLOCKSIZE + 72  AIX compiler chokes */
+  char * buffer = SH_ALLOC(8264); /* BLOCKSIZE + 72  AIX compiler chokes */
   size_t sum;
 
@@ -870,4 +870,5 @@
       SH_FREE(tmp);
       *Length = 0;
+      SH_FREE(buffer);
       return -1;
     }
@@ -878,5 +879,5 @@
 
   /* Iterate over full file contents.  */
-  while (1 == 1) {
+  while (1) {
     /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
        computation function processes the whole buffer so that with the
@@ -894,5 +895,8 @@
 	{
 	  if (sig_termfast == 1)
-	    return -1;
+	    {
+	      SH_FREE(buffer);
+	      return -1;
+	    }
 	  TPT((0, FIL__ , __LINE__ , _("msg=<SL_ISERROR (%ld)>\n"), n));
 	  tmp = sh_util_safe_name (filename);
@@ -909,4 +913,5 @@
 	  SH_FREE(tmp);
 	  *Length = 0;
+	  SH_FREE(buffer);
 	  return -1;
 	}
@@ -953,4 +958,5 @@
       {
 	*Length = 0;
+	SH_FREE(buffer);
 	return -1;
       }
@@ -969,4 +975,5 @@
 
   *Length = bcount;
+  SH_FREE(buffer);
   return 0;
 }
@@ -1367,5 +1374,5 @@
   static const int BLOCKSIZE = 4096;
   struct sha_ctx ctx;
-  char buffer[4168]; /* BLOCKSIZE + 72 AIX compiler chokes */
+  char * buffer = SH_ALLOC(4168); /* BLOCKSIZE + 72 AIX compiler chokes */
   off_t sum = 0;
   char * tmp;
@@ -1393,4 +1400,5 @@
       SH_FREE(tmp);
       *Length = 0;
+      SH_FREE(buffer);
       return -1;
     }
@@ -1417,5 +1425,8 @@
 	{
 	  if (sig_termfast == 1)
-	    return -1;
+	    {
+	      SH_FREE(buffer);
+	      return -1;
+	    }
 
 	  TPT((0, FIL__ , __LINE__ , _("msg=<SL_ISERROR (%ld)>\n"), n));
@@ -1435,4 +1446,5 @@
 	  SH_FREE(tmp);
 	  *Length = 0;
+	  SH_FREE(buffer);
 	  return -1;
 	}
@@ -1479,4 +1491,5 @@
       {
 	*Length = 0;
+	SH_FREE(buffer);
 	return -1;
       }
@@ -1497,4 +1510,5 @@
   sha_digest (&ctx, resblock);
   *Length = bcount;
+  SH_FREE(buffer);
   return 0;
 }
@@ -1505,5 +1519,5 @@
 				   char * out, size_t len)
 {
-  int cnt = (int) Length;  /* fix compiler warning */
+  int cnt;
   char outbuf[KEY_LEN+1];
   unsigned char sha1buffer[20];
Index: trunk/src/sh_tiger1_64.c
===================================================================
--- trunk/src/sh_tiger1_64.c	(revision 170)
+++ trunk/src/sh_tiger1_64.c	(revision 171)
@@ -371,10 +371,10 @@
 }
 
-void tiger_compress(word64 *str, word64 state[3])
+void tiger_compress(const word64 *str, word64 state[3])
 {
   tiger_compress_macro(((word64*)str), ((word64*)state));
 }
 
-void tiger_t(word64 *str, word64 length, word64 res[3])
+void tiger_t(const word64 *str, word64 length, word64 res[3])
 {
   register word64 i;
Index: trunk/src/sh_tools.c
===================================================================
--- trunk/src/sh_tools.c	(revision 170)
+++ trunk/src/sh_tools.c	(revision 171)
@@ -725,7 +725,5 @@
   struct  sigaction  new_act;
   struct  sigaction  old_act;
-#if defined(WITH_TPT) 
   char    errbuf[SH_ERRBUF_SIZE];
-#endif
 
   SL_ENTER(_("sh_write_select"));
@@ -762,10 +760,13 @@
 		continue;
 	      }
-	    if ( errno == EINTR) /* try again */
+	    if ( errno == EINTR || errno == EINPROGRESS ) /* try again */
 	      continue;
 	    *w_error = errno;
-	    TPT(( 0, FIL__, __LINE__, _("msg=<select: %s>\n"), 
-		  sh_error_message(*w_error, errbuf, sizeof(errbuf))));
 	    sigaction (SIGPIPE, &old_act, NULL);
+	    sh_error_message(*w_error, errbuf, sizeof(errbuf));
+	    sh_error_handle (SH_ERR_INFO, FIL__, __LINE__, errno, MSG_E_SUBGEN,
+			     errbuf,
+			     _("sh_write_select (ws)") ); 
+	    TPT(( 0, FIL__, __LINE__, _("msg=<select: %s>\n"), errbuf ));
 	    SL_RETURN( countbytes, _("sh_write_select"));
 	  }
@@ -780,10 +781,13 @@
 		continue;
 	      }
-	    if ( errno == EINTR ) /* try again */
+	    if ( errno == EINTR || errno == EINPROGRESS ) /* try again */
 	      continue;
 	    *w_error = errno;
-	    TPT(( 0, FIL__, __LINE__, _("msg=<select: %s>\n"), 
-		  sh_error_message(*w_error, errbuf, sizeof(errbuf))));
 	    sigaction (SIGPIPE, &old_act, NULL);
+	    sh_error_message(*w_error, errbuf, sizeof(errbuf));
+	    sh_error_handle (SH_ERR_INFO, FIL__, __LINE__, errno, MSG_E_SUBGEN,
+			     errbuf,
+			     _("sh_write_select (rs)") ); 
+	    TPT(( 0, FIL__, __LINE__, _("msg=<select: %s>\n"), errbuf ));
 	    SL_RETURN( countbytes, _("sh_write_select"));
 	  }
@@ -836,4 +840,9 @@
 	    *w_error = errno;
 	    sigaction (SIGPIPE, &old_act, NULL);
+	    sh_error_message(*w_error, errbuf, sizeof(errbuf));
+	    sh_error_handle (SH_ERR_INFO, FIL__, __LINE__, errno, MSG_E_SUBGEN,
+			     errbuf,
+			     (type == SH_DO_WRITE) ? 
+			     _("sh_write_select (w)") : _("sh_write_select (r)")); 
 	    TPT(( 0, FIL__, __LINE__, _("msg=<count < 0>\n")));
 	    SL_RETURN( countbytes, _("sh_write_select"));
Index: trunk/src/sh_unix.c
===================================================================
--- trunk/src/sh_unix.c	(revision 170)
+++ trunk/src/sh_unix.c	(revision 171)
@@ -3773,6 +3773,15 @@
 	{
 #ifdef HAVE_LIBZ
-	  unsigned long   clen = compressBound(sh_string_len(content));
-	  unsigned char * compressed = SH_ALLOC(clen);
+	  unsigned long   clen;
+	  unsigned char * compressed;
+#ifdef HAVE_COMPRESSBOUND
+	  clen       = compressBound(sh_string_len(content));
+#else
+	  if (sh_string_len(content) > 10*SH_TXT_MAX)
+	    clen = SH_TXT_MAX;
+	  else
+	    clen = 13 + (int)(1.0001*sh_string_len(content));
+#endif
+	  compressed = SH_ALLOC(clen);
 	  if (Z_OK == compress(compressed, &clen, 
 			       (unsigned char *) sh_string_str(content), 
Index: trunk/src/trustfile.c
===================================================================
--- trunk/src/trustfile.c	(revision 170)
+++ trunk/src/trustfile.c	(revision 171)
@@ -782,4 +782,5 @@
 	  register int i;		/* trustworthy or not?       */
 	  const char * t_const;
+	  char *end;
 
 	  /*
@@ -810,4 +811,8 @@
 	  if (csym[0] != '/')
 	    {
+	      /* pointer to one above last element
+	       */
+	      end = &full[MAXFILENAME-1]; ++end;
+
 	      /* initialize pointers 
 	       */
@@ -817,5 +822,5 @@
 	       */
 	      t = fexp;
-	      while(*t && b < &full[MAXFILENAME])
+	      while(*t && b < end)
 		*b++ = *t++;
 
@@ -823,5 +828,5 @@
 	       */
 	      t_const = "/../";
-	      while(*t && b < &full[MAXFILENAME])
+	      while(*t && b < end)
 		*b++ = *t_const++;
 
@@ -829,10 +834,10 @@
 	       */
 	      t = csym;
-	      while(*t && b < &full[MAXFILENAME])
+	      while(*t && b < end)
 		*b++ = *t++;
 
 	      /* see if we're too big 
 	       */
-	      if (*t || b == &full[MAXFILENAME])
+	      if (*t || b == end)
 		{
 		  /* yes -- error 
