Array vs Binary Search Tree

An interesting comparison can be made between an array and a binary search tree. Both are very fast.

The array uses less memory and takes less time to add data to when it is not sorted. Arrays add data at a constant time rate or O(1) in Big O notation or at a linear rate, O(n), when sorted. Sorting a wholey unsorted array is done in O(n•log(n)) time using the quicksort algorithm. Sorting using this algorithm along with the time to add the data unsorted makes the array faster to add data to. Searching an array is done with a binary search.

The link-based binary search tree must allocate memory for its nodes slowing it down, but searches a lot faster (on a percentage basis) than the array. The search speed should be identical, O(log(n)) (best case), however it just does not work out this way. Adding a single item to a large data structure is faster with a binary tree, O(log(n)) assuming it is well balanced.

The array-based binary search tree is about 25% faster than the link-based one due to it not having the need to allocate memory on a per node basis. The entire array is allocated all at once. The array does not grow and this is its achilles heel.

Again, the search results should be the same, however, the binary search tree is faster for both the link-based and array-based binary search tree.

It is important to note that this is a well balanced binary search tree, sorting random data. This makes it faster but at best it should only be as fast as a binary search. There is code on this page to balance a binary search tree to increase its performance even more. A perfectly balanced tree should be just as fast as a binary search on a sorted array.

Notice that the code for the array has been optimized for the int data type.

The spaghetti code for this program is as follows.

#include <iostream>
#include <string>
#include <stdlib.h>
#include <ctime>
using namespace std;

class TRandomMersenne {				// encapsulate random number generator
										 // or constants for MT19937:
#define MERS_N	 624
#define MERS_M	 397
#define MERS_R	 31
#define MERS_U	 11
#define MERS_S	 7
#define MERS_T	 15
#define MERS_L	 18
#define MERS_A	 0x9908B0DF
#define MERS_B	 0x9D2C5680
#define MERS_C	 0xEFC60000

public:
	TRandomMersenne(unsigned long seed) {		 // constructor
		RandomInit(seed);
	}
	void RandomInit(unsigned long seed);		// re-seed
	void RandomInitByArray(unsigned long seeds[], int length); // seed by more than 32 bits
	int IRandom(int min, int max);		 // output random integer
	double Random();					 // output random float
	unsigned long BRandom();					// output random bits
private:
	unsigned long mt[MERS_N];					 // state vector
	int mti;							 // index into mt
	enum TArch { LITTLEENDIAN, BIGENDIAN, NONIEEE };
	TArch Architecture;					// conversion to float depends on computer architecture
};
void TRandomMersenne::RandomInit(unsigned long seed) {
	// re-seed generator
	mt[0] = seed;
	for (mti = 1; mti < MERS_N; mti++) {
		mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
	}

	// detect computer architecture
	union { double f; unsigned long i[2]; } convert;
	convert.f = 1.0;
	// Note: Old versions of the Gnu g++ compiler may make an error here,
	// compile with the option	-fenum-int-equiv	to fix the problem
	if (convert.i[1] == 0x3FF00000) Architecture = LITTLEENDIAN;
	else if (convert.i[0] == 0x3FF00000) Architecture = BIGENDIAN;
	else Architecture = NONIEEE;
}

void TRandomMersenne::RandomInitByArray(unsigned long seeds[], int length) {
	// seed by more than 32 bits
	int i, j, k;
	RandomInit(19650218UL);
	if (length <= 0) return;
	i = 1;	j = 0;
	k = (MERS_N > length ? MERS_N : length);
	for (; k; k--) {
		mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) + seeds[j] + j;
		i++; j++;
		if (i >= MERS_N) { mt[0] = mt[MERS_N - 1]; i = 1; }
		if (j >= length) j = 0;
	}
	for (k = MERS_N - 1; k; k--) {
		mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) - i;
		if (++i >= MERS_N) { mt[0] = mt[MERS_N - 1]; i = 1; }
	}
	mt[0] = 0x80000000UL;
} // MSB is 1; assuring non-zero initial array

unsigned long TRandomMersenne::BRandom() {
	// generate 32 random bits
	unsigned long y;

	if (mti >= MERS_N) {
		// generate MERS_N words at one time
		const unsigned long LOWER_MASK = (1LU << MERS_R) - 1; // lower MERS_R bits
		const unsigned long UPPER_MASK = -1L << MERS_R;		// upper (32 - MERS_R) bits
		static const unsigned long mag01[2] = { 0, MERS_A };

		int kk;
		for (kk = 0; kk < MERS_N - MERS_M; kk++) {
			y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
			mt[kk] = mt[kk + MERS_M] ^ (y >> 1) ^ mag01[y & 1];
		}

		for (; kk < MERS_N - 1; kk++) {
			y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
			mt[kk] = mt[kk + (MERS_M - MERS_N)] ^ (y >> 1) ^ mag01[y & 1];
		}

		y = (mt[MERS_N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
		mt[MERS_N - 1] = mt[MERS_M - 1] ^ (y >> 1) ^ mag01[y & 1];
		mti = 0;
	}

	y = mt[mti++];

	// Tempering (May be omitted):
	y ^= y >> MERS_U;
	y ^= (y << MERS_S) & MERS_B;
	y ^= (y << MERS_T) & MERS_C;
	y ^= y >> MERS_L;
	return y;
}

double TRandomMersenne::Random() {
	// output random float number in the interval 0 <= x < 1
	union { double f; unsigned long i[2]; } convert;
	unsigned long r = BRandom(); // get 32 random bits
	switch (Architecture) {
	case LITTLEENDIAN:
		convert.i[0] = r << 20;
		convert.i[1] = (r >> 12) | 0x3FF00000;
		return convert.f - 1.0;
	case BIGENDIAN:
		convert.i[1] = r << 20;
		convert.i[0] = (r >> 12) | 0x3FF00000;
		return convert.f - 1.0;
	case NONIEEE: default:
		;
	}
	// This somewhat slower method works for all architectures, including 
	// non-IEEE floating point representation:
	return (double)r * (1. / ((double)(unsigned long)(-1L) + 1.));
}

int TRandomMersenne::IRandom(int min, int max) {
	// output random integer in the interval min <= x <= max
	int r;
	r = int((max - min + 1) * Random()) + min; // multiply interval with random and truncate
	if (r > max) r = max;
	if (max < min) return 0x80000000;
	return r;
}






template <class TData> class ArrayNode
{
public:
	TData data;
	int ileft, iright;
	ArrayNode() : ileft(-1), iright(-1) {}
	~ArrayNode()
	{
		ileft = iright = -1;
	}
};





template <class TData> class BinaryArrayTree
{
private:
	ArrayNode<TData> *arraytree;
	int idx;
	BinaryArrayTree() {};

public:
	BinaryArrayTree(const unsigned int arraySize);
	~BinaryArrayTree();
	void Add(const TData &data);
	bool Search(const TData &data);
};

template <class TData> void BinaryArrayTree<TData>::Add(const TData &data)
{
	if(idx)
	{
		int *p;
		if (data == arraytree[0].data)
			return;
		if (data < arraytree[0].data)
			p = &arraytree[0].ileft;
		else
			p = &arraytree[0].iright;
		while (*p > -1)
		{
			if (data == arraytree[*p].data)
				return;
			if (data < arraytree[*p].data)
				p = &arraytree[*p].ileft;
			else
				p = &arraytree[*p].iright;
		}
		*p = idx;
		arraytree[idx++].data = data;
	}
	else
		arraytree[idx++].data = data;
}

template <class TData> bool BinaryArrayTree<TData>::Search(const TData &data)
{
	if (idx)
	{
		int i = 0;
		while (i > -1)
		{
			if (data == arraytree[i].data)
				return true;
			if (data < arraytree[i].data)
				i = arraytree[i].ileft;
			else
				i = arraytree[i].iright;
		}
	}
	return false;
}

template <class TData> BinaryArrayTree<TData>::BinaryArrayTree(const unsigned int arraySize) : arraytree(new ArrayNode<TData>[arraySize]), idx(0)
{
}

template <class TData> BinaryArrayTree<TData>::~BinaryArrayTree()
{
	if (arraytree)
		delete[]arraytree;
	arraytree = nullptr;
}






template <class TData> class LinkedNode
{
private:
	LinkedNode() {}
public:
	TData data;
	LinkedNode<TData> *pleft, *pright;
	LinkedNode(TData &dataToSort) : pleft(nullptr), pright(nullptr), data(dataToSort) {}
	~LinkedNode()
	{
		if (pleft)
		{
			delete pleft;
			pleft = nullptr;
		}
		if (pright)
		{
			delete pright;
			pright = nullptr;
		}
	}
};





template <class TData> class BinaryLinkedTree
{
private:
	LinkedNode<TData> *ptree;
	void Add(TData &data, LinkedNode<TData> **ppnode);
	bool Search(const TData &data, LinkedNode<TData> **ppnode);

public:
	BinaryLinkedTree();
	~BinaryLinkedTree();
	void Add(TData &data);
	bool Search(const TData &data);
};

template <class TData> void BinaryLinkedTree<TData>::Add(TData &data, LinkedNode<TData> **ppnode)
{
	if ((*ppnode) == nullptr)
		*ppnode = new LinkedNode<TData>(data);
	else
	{
		if (data == (*ppnode)->data)
			return;
		if (data < (*ppnode)->data)
			Add(data, &(*ppnode)->pleft);
		else
			Add(data, &(*ppnode)->pright);
	}
}

template <class TData> void BinaryLinkedTree<TData>::Add(TData &data)
{
	Add(data, &ptree);
}

template <class TData> bool BinaryLinkedTree<TData>::Search(const TData &data, LinkedNode<TData> **ppnode)
{
	if ((*ppnode) != nullptr)
	{
		if (data < (*ppnode)->data)
			return Search(data, &(*ppnode)->pleft);
		if (data >(*ppnode)->data)
			return Search(data, &(*ppnode)->pright);
		return true;
	}
	return false;
}

template <class TData> bool BinaryLinkedTree<TData>::Search(const TData &data)
{
	return Search(data, &ptree);
}

template <class TData> BinaryLinkedTree<TData>::BinaryLinkedTree() : ptree(nullptr)
{
}

template <class TData> BinaryLinkedTree<TData>::~BinaryLinkedTree()
{
	if (ptree != nullptr)
		delete ptree;
	ptree = nullptr;
}





#define DEFAULT_SIZE 1000

template <class TData> class Array
{
private:
	TData *items;
	unsigned int length, max;
	bool sorted;
	void Sort(TData * const pparray, const int size) const;

public:
	Array();
	Array(unsigned int arrayLength);
	~Array();
	unsigned int Length() const;
	void Add(const TData data);
	TData operator[](const unsigned int index);
	bool Remove(const TData &data);
	void Clear();
	unsigned int GetIndexOf(const TData data); // returns Length() when data not found
};

template <class TData> void Array<TData>::Sort(TData * const pparray, const int size) const // quicksort
{
	if (size < 2)
		return;
	TData pivot = pparray[size / 2];
	TData *left = pparray;
	TData *right = pparray + size - 1;
	while (true) {
		while ((left <= right) && *left < pivot) left++;
		while ((left <= right) && *right > pivot) right--;
		if (left > right) break;
		TData temp = *left;
		*left++ = *right;
		*right-- = temp;
	}
	Sort(pparray, right - pparray + 1);
	Sort(left, pparray + size - left);
}

template <class TData> Array<TData>::Array() : max(DEFAULT_SIZE), length(0), sorted(false)
{
	items = new TData[max];
	for (int i = 0; i < max; items[i++] = nullptr);
}

template <class TData> Array<TData>::Array(unsigned int arrayLength) : max(arrayLength), length(0), sorted(false)
{
	if (max == 0)
		max = DEFAULT_SIZE;
	items = new TData[max];
	for (int i = 0; i < max; items[i++] = 0);
}

template <class TData> Array<TData>::~Array()
{
	Clear();
	delete[]items;
	items = nullptr; // clean up the memory
}

template <class TData> unsigned int Array<TData>::Length() const
{
	return length;
}

template <class TData> void Array<TData>::Add(const TData data)
{
	if (length == max) // then increase size of array
	{
		max *= 2;
		TData *newArray = new TData[max];
		int i;
		for (i = 0; i < length; newArray[i++] = items[i]);
		for (; i < max; newArray[i++] = 0);
		for (i = 0; i < length; items[i++] = 0);
		delete[]items;
		items = newArray;
	}
	items[length++] = data;
	if (sorted)
	{
		TData temp;
		unsigned int count;
		for (count = length - 1; count > 0; count--) // reversed bubblesort with break statement
		{
			if (items[count] < items[count - 1])
			{
				temp = items[count];
				items[count] = items[count - 1];
				items[count - 1] = temp;
			}
			else
				break;
		}
	}
}

template <class TData> TData Array<TData>::operator[](const unsigned int index)
{
	// make sure to use Length() to know the size of the array so as not to exceed the bounds of the array of items
	return items[index];
}

template <class TData> bool Array<TData>::Remove(const TData &data)
{
	unsigned int idx = GetIndexOf(data);
	if (idx < length)
	{
		delete items[idx];
		length--;
		for (int i = idx; i < length; i++)
			items[i] = items[i + 1];
		items[length] = 0; // clean up the memory
		return true;
	}
	return false;
}

template <class TData> unsigned int Array<TData>::GetIndexOf(const TData data)
{
	// this procedure uses binary search so array must be sorted
	if (!sorted)
	{
		Sort(items, length);
		sorted = true;
	}
	int first = 0, last = length - 1, middle;
	while (first <= last)
	{
		middle = (first + last) / 2;
		if (items[middle] == data)
			return middle;
		if (data > items[middle])
			first = middle + 1;
		else
			last = middle - 1;
	}
	return length;
}

template <class TData> void Array<TData>::Clear()
{
	for (int i = 0; i < length; i++)
		items[i] = 0;
	length = 0;
}

#define ITEMS 10000000

int main()
{
	BinaryLinkedTree<int> blt;
	BinaryArrayTree<int> bat(ITEMS);
	Array<int> a(ITEMS);
	int *temp = new int[ITEMS];

	int start, tim, i;
	TRandomMersenne randm(time(0));

	for (i = 0; i < ITEMS; i++)
		temp[i] = randm.IRandom(-ITEMS, ITEMS);

	start = clock();
	for (i = 0; i < ITEMS; i++)
		blt.Add(temp[i]);
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to linked-based binary search tree" << endl << "Space used on 32-bit system: " << ((4 * 3) * ITEMS) << " bytes" << endl << endl;

	start = clock();
	for (i = 0; i < ITEMS; i++)
		bat.Add(temp[i]);
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to array-based binary search tree" << endl << "Space used on 32-bit system: " << ((4 * 3) * ITEMS) << " bytes" << endl << endl;

	start = clock();
	for (i = 0; i < ITEMS; i++)
		a.Add(temp[i]);
	a.GetIndexOf(temp[0]); // this will force a sort of the array
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to add " << ITEMS << " items to array" << endl << "Space used on 32-bit system: " << (4 * ITEMS) << " bytes" << endl << endl;

	start = clock();
	for (i = 0; i < ITEMS; blt.Search(i++));
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to search linked-based binary search tree " << ITEMS << " times" << endl << endl;

	start = clock();
	for (i = 0; i < ITEMS; bat.Search(i++));
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to search array-based binary search tree " << ITEMS << " times" << endl << endl;

	start = clock();
	for (i = 0; i < ITEMS; a.GetIndexOf(i++));
	tim = (clock() - start);
	cout << fixed << ((float)tim / 1000.0) << " seconds to search array using binary search " << ITEMS << " times" << endl << endl;

	delete[]temp;
	return 0;
}