Working on priority queue and improving Dictionary notes

Clément · Clément · commit 0a03ecbfc2c1 · 2025-10-23T14:21:04.000-04:00
diff --git a/source/code/projects/Dictionary/Dictionary/Dictionary.cs b/source/code/projects/Dictionary/Dictionary/Dictionary.cs
@@ -105,7 +105,10 @@ public int GetIndex(TKey keyP, int countP)
             Math.Abs(keyP.GetHashCode())
             + CollisionResolution(keyP, countP)
           ) % table.Length;
+
     }
+    // GetIndex is public for demonstration purposes, 
+    // but should really be private.
 
     // This is how collisions are handled.
     // It depends on the strategy picked (Strategy),
@@ -123,16 +126,27 @@ private int CollisionResolution(TKey keyP, int countP)
                 return countP * countP;
             else if (Strategy == PSSType.Double)
                 // This is double hashing.
-                return countP * (31 - (keyP.GetHashCode() % 31));
-            // countP * hash2(keyP) where hash2 is 31 - (key % 31) and will always be > 0
+                return countP * GetHash2(keyP);
             else
+                // This is needed to compile:
+                // even if we know that those are 
+                // the only values in the PSSType 
+                // enumerated datatype, C# will
+                // complain that not all code path 
+                // return a value otherwise.
                 throw new ApplicationException(
                   "Unknown collision startegy."
                 );
         }
     }
     // Done with GetIndex and CollisionResolution.
 
+    // Secondary hash function
+    private int GetHash2(TKey key)
+    {
+        return table.Length - (key.GetHashCode() % table.Length);
+    }
+
     // Adding an element
     public void Add(TKey keyP, TValue valueP)
     {
diff --git a/source/code/projects/Dictionary/Dictionary/Program.cs b/source/code/projects/Dictionary/Dictionary/Program.cs
@@ -25,7 +25,7 @@ static void Main(string[] args)
         CDictionary<string, Address> notebook = new CDictionary<
           string,
           Address
-        >(13, CDictionary<string, Address>.PSSType.Linear);
+        >(13, CDictionary<string, Address>.PSSType.Quad);
         // Key of type string, value of type Address, Linear probe sequence strategy.
         // Try with
         // CDictionary<string, Address>.PSSType.Linear
@@ -90,5 +90,49 @@ static void Main(string[] args)
         {
             Console.Write((i * i) % 13 + ", ");
         }
+        Console.WriteLine("…");
+
+        Console.WriteLine("\nFinally, let us observe the index computed using the quadratic strategy:");
+
+        CDictionary<string, string> demoQ = new CDictionary<
+string,
+string
+>(1009, CDictionary<string, string>.PSSType.Quad);
+        bool[] arrayQ = new bool[1009];
+        for (int i = 0; i < arrayQ.Length; i++)
+        {
+            arrayQ[demoQ.GetIndex("Test", i)] = true;
+            // Uncomment the following if you'd like to see 
+            // which indices are hit.
+            // Console.WriteLine(i +": " + demoQ.GetIndex("Test", i) + ".");
+        }
+        int count = 0;
+        for (int i = 0; i < arrayQ.Length; i++)
+        {
+            if (arrayQ[i]) count++;
+        }
+        Console.WriteLine($"We hit {((decimal)count / arrayQ.Length):p} of the indices.");
+
+        Console.WriteLine("\nFinally, let us observe the index computed using the double hash strategy:");
+        // Demonstrating the double hash strategy:
+                    CDictionary<string, string> demoD = new CDictionary<
+          string,
+          string
+        >(1009, CDictionary<string, string>.PSSType.Double);
+        bool[] arrayD = new bool[1009];
+        for (int i = 0; i < arrayD.Length; i++)
+        {
+            arrayD[demoD.GetIndex("Test", i)] = true;
+            // Uncomment the following if you'd like to see 
+            // which indices are hit.
+            // Console.WriteLine(i +": " + demoD.GetIndex("Test", i) + ".");
+        }
+        count = 0;
+        for (int i = 0; i < arrayD.Length; i++)
+        {
+            if (arrayD[i]) count++;
+        }
+        Console.WriteLine($"We hit {((decimal)count / arrayD.Length):p} of the indices.");
+        // 100% !
     }
 }
diff --git a/source/code/projects/PQueue_array/PQueue/PQueue.cs b/source/code/projects/PQueue_array/PQueue/PQueue.cs
@@ -0,0 +1,137 @@
+﻿using System; // This is required for the exception.
+
+class PQueue<TPriority, TValue> where TPriority : IComparable<TPriority>
+{
+    class Cell
+    {
+        public TPriority Priority { get; set; }
+        public TValue Value { get; set; }
+        public Cell(TPriority priorityP, TValue valueP)
+        {
+            Priority = priorityP;
+            Value = valueP;
+        }
+        public override string ToString()
+        {
+            return Value + " (priority: " + Priority + ")";
+        }
+    }
+    private Cell[] mArray;
+    public PQueue(int sizeP = 10)
+    {
+        mArray = new Cell[sizeP];
+    }
+    public void Add(TPriority priorityP, TValue valueP)
+    {
+        // slot is the index where we will add the element
+        int slot = -1;
+        // index is where we are currently looking for
+        // a slot in the arry.
+        int index = 0;
+        while (index < mArray.Length && slot == -1)
+        {
+            if (mArray[index] == null)
+            {
+                slot = index;
+            }
+            else
+            {
+                index++;
+            }
+        }
+        if (slot == -1)
+        {
+            throw new ApplicationException("Could not add the element.");
+        }
+        else
+        {
+            mArray[slot] = new Cell(priorityP, valueP);
+        }
+    }
+
+    public int MinPriority()
+    {
+        int index = 0;
+        // We begin by looking for a value 
+        // in mArray that is not null.
+        bool notNull = false;
+        while (index < mArray.Length && !notNull)
+        {
+            if (mArray[index] != null)
+            {
+                // We found a value that is not null.
+                notNull = true;
+            }
+            else
+            {
+                index++;
+            }
+        }
+        // If we exit and notNull is still false,
+        // it means there is no non-null cell in
+        // the array.
+        if (!notNull)
+        {
+            throw new ApplicationException("Queue is empty, no index with minimal priority.");
+        }
+        // Minimal priority found "so far".
+        TPriority minP = mArray[index].Priority;
+        // Index of the minimal priority found "so far".
+        int minI = index;
+        while (index < mArray.Length)
+        {
+            // The following if is crucial: there may 
+            // be null values in the array, and we should
+            // not try to access the Priority property
+            // if mArray[index] is null.
+            if (mArray[index] != null)
+            {
+                // If we found a lower priority, 
+                // we update the minP and minI 
+                // values.
+                if (mArray[index].Priority.CompareTo(minP) < 0)
+                {
+                    minP = mArray[index].Priority;
+                    minI = index;
+                }
+            }
+            index++;
+        }
+        return minI;
+    }
+
+    public string Peek()
+    {
+        // Looking at the most urgent Cell
+        // uses MinPriority.
+        return mArray[MinPriority()].ToString();
+    }
+
+    public string Extract()
+    {
+        // Removing the most urgent Cell
+        // relies also on MinPriority().
+        int minE = MinPriority();
+        Cell cellE = mArray[minE];
+        mArray[minE] = null;
+        return cellE.ToString();
+    }
+
+    public override string ToString()
+    {
+        string ret = "";
+        for (int i = 0; i < mArray.Length; i++)
+        {
+            if (mArray[i] != null)
+            {
+                ret += mArray[i].ToString();
+            }
+            else
+            {
+                ret += "(empty)";
+            }
+            ret += "\n";
+        }
+        return ret;
+    }
+}
diff --git a/source/code/projects/PQueue_array/PQueue/Program.cs b/source/code/projects/PQueue_array/PQueue/Program.cs
@@ -0,0 +1,63 @@
+﻿using System;
+
+class Program
+{
+  static void Main(string[] args)
+  {
+    PQueue<int, string> myQueue = new PQueue<int, string>(5);
+        try
+        {
+            myQueue.MinPriority();
+        }
+        catch(Exception ex)
+        {
+            Console.WriteLine(ex.Message);
+        }
+
+        /* 
+         * https://en.wikipedia.org/wiki/Emergency_Severity_Index
+         * Level 1: Resuscitation
+         * Level 2: Emergent
+         * Level 3: Urgent
+         * Level 4: Semi-Urgent
+         * Level 5: Non-urgent
+        */
+        myQueue.Add(1, "Cardiac arrest");
+        Console.WriteLine(myQueue);
+        int minP = myQueue.MinPriority();
+        Console.Write("Most urgent priority is at index " + minP + " with " + myQueue.Peek() + ".\n");
+        myQueue.Add(3, "High fever with cough");
+        myQueue.Add(2, "Asthma attack");
+        myQueue.Add(5, "Prescription refill ");
+        myQueue.Add(4, "Simple laceration");
+        Console.WriteLine(myQueue);
+
+        try
+        {
+            myQueue.Add(3, "Abdominal pain");
+        }
+        catch (Exception ex)
+        {
+            Console.WriteLine(ex.Message);
+        }
+
+        Console.Write("Removing most urgent: " +  myQueue.Extract() + ".\n");
+        Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+
+        Console.WriteLine(myQueue);
+        myQueue.Add(5, "Suture removal");
+        Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+        Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+        Console.WriteLine(myQueue);
+
+        Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+        Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+        try
+        {
+            Console.Write("Removing most urgent: " + myQueue.Extract() + ".\n");
+        }catch (Exception ex)
+        {
+            Console.WriteLine(ex.Message);
+        }
+    }
+}
diff --git a/source/lectures/data/dictionary.md b/source/lectures/data/dictionary.md
@@ -142,7 +142,7 @@ We illustrate this [point below](#handling-deletion).
 !include`snippetStart="// We use a bool Find sub-routine", snippetEnd="// Done with found."` code/projects/Dictionary/Dictionary/Dictionary.cs
 ```
 
-#### Handling Deletion
+#### Handling deletion
 
 The `Remove` method heavily relies on `FindI`:
 
@@ -186,25 +186,35 @@ Table size | Probe sequence
 
 While still not ideal, we can see that using a prime number for the size allows to "break the cyclicity" every now and then and to obtain additional numbers in the sequence: we go from 4 different indices to 7 per 10 indexes.
 
-The default size of 31 is picked for [various reasons](https://stackoverflow.com/questions/299304/why-does-javas-hashcode-in-string-use-31-as-a-multiplier), some being historical, 
+The default size of 31 is picked for [various reasons](https://stackoverflow.com/questions/299304/why-does-javas-hashcode-in-string-use-31-as-a-multiplier), some being historical.
 
+As we can see, the quadratic probing strategy has an issue that linear probing does not have: it may "skip" some indices, and incorrectly returns that the array is full while it is not. 
+Why would we chose it, then? Because of clustering.
 
 #### Clustering
 
-In general, the main goal is to avoid having parts of the array filled while other parts are left unused, a situation known as *clustering*.
+An important goal of dictionaries is to avoid having parts of the array filled while other parts are left unused, a situation known as *clustering*.
+This is detrimental, because finding an index requires more and more computation if keys are often given the same or close indices (i.e., we need to call `GetIndex` with higher `countP` values).
+
 This situation *will* happen if too many keys are given the same hash and index, something that is hard to predict since keys will in general not be uniformly distributed and not known ahead of time.
-Linear probing is very bad in solving this problem, since the clusters are "spread out continuously", quadratic probing is an improvement, but only partially solve this issue, since keys with identical hashes will still follow teh same sequence. 
-Double hashing is a bit better at solving this problem, since keys with identical hashes may drift apart significantly when the secondary hash function is applied.
+Linear probing is very bad in solving this problem, since the clusters are "spread out continuously", quadratic probing is an improvement, but only partially solve this issue, since keys with identical hashes will still follow the same sequence. 
+[Double hashing](https://en.wikipedia.org/wiki/Double_hashing) is a bit better at solving this problem, since keys with identical hashes may drift apart significantly when the secondary hash function is applied:
 
-This general discussion relates to performance and requires to measure the dictionary's load factor, which is the number of entries occupied in the hash table divided by the table length (or number of "buckets").
-Of course, open-addressed hash table cannot have a load factor greater than 1, but other techniques, such as chaining, allows for larger load factors.
 
-<!--
-Double hashing, in which the interval between probes is computed by a secondary hash function
+```{download="./code/projects/Dictionary.zip"}
+!include`snippetStart="// Secondary hash function", snippetEnd="// Adding an element"` code/projects/Dictionary/Dictionary/Dictionary.cs
+```
 
-For open addressing schemes, the hash function should also avoid clustering, the mapping of two or more keys to consecutive slots. Such clustering may cause the lookup cost to skyrocket, even if the load factor is low and collisions are infrequent. The popular multiplicative hash is claimed to have particularly poor clustering behavior.[22][4]
+A second Hash function **must never evaluate to zero** (otherwise we are just trying the same spot again and again), be as independent from the first hash function as possible, and should help in trying as many slots as possible.
+Note that our function never evaluate to zero, since `key.GetHashCode() % table.Length` gives a value between 0 and `table.Length`-1, so `table.Length - (key.GetHashCode() % table.Length)` gives a value between 1 and `table.Length`.
 
-https://www.javamex.com/tutorials/collections/hash_function_technical_2.shtml
+Our `main` method includes a test demonstrating the efficiency of our double hashing techniques:
 
-cf. "Implementing Double Hashing" at <https://pressbooks.palni.org/anopenguidetodatastructuresandalgorithms/chapter/hashing-and-hash-tables/>
--->
+```{download="./code/projects/Dictionary.zip"}
+!include`snippetStart="// Demonstrating the double hash strategy:", snippetEnd="// 100% !"` code/projects/Dictionary/Dictionary/Dictionary.cs
+```
+
+While the quadratic method hits about 50% of the indices, the double hashing techniques reach 100%!
+
+This general discussion relates to performance and requires to measure the dictionary's load factor, which is the number of entries occupied in the hash table divided by the table length (or number of "buckets").
+Of course, open-addressed hash table cannot have a load factor greater than 1, but other techniques, such as chaining, allows for larger load factors.
diff --git a/source/lectures/data/priority_queue.md b/source/lectures/data/priority_queue.md
diff --git a/source/order b/source/order