Fast Sorting of a Linked List

Merge-sort is a favourite algorithm for academic study, because it is very fast and it is also very easy to analyse its speed mathematically. However, for most applications it is not a very good sorting algorithm, because it can not be used on arrays without requiring an enormous extra allocation of memory, equal in size to the array being sorted.

It is a different story for linked lists. Merge-sort works perfectly on linked lists, without requiring any extra storage at all. All of the other fast sorting algorithms that are good for arrays are bad for linked lists.

The full program is shown here, followed by some sample runs.


      #include <stdio.h>
      #include <stdlib.h>
      
      int debug=0;
      
      int stats1[100], stats2[100];
      
      struct link
      { int data;
        link * next; };
      
      link * list = NULL;
      
               // Add random number in range 10-99 to front of list
      void add_random_data(link * & list)
      { link * n = new link;
        n->data = random() % 90 + 10;
        n->next = list;
        list = n; }
      
                // count up number of times each value appears in the list,
                // used to check that sorted list really has same data as
                // original list.
      void makestats(link * curr, int count[])
      { for (int i=0; i<100; i+=1)
          count[i]=0;
        while (curr!=NULL)
        { count[curr->data]+=1;
          curr=curr->next; } }
      
                // checks that two count arrays are the same
      int issame(int count1[], int count2[])
      { for (int i=0; i<100; i+=1)
          if (count1[i]!=count2[i])
            return 0;
        return 1; }
      
                // checks that the numbers in a list really do appear
                // in ascending order. If issorted is true, and issame reports
                // that the datra hasn't changed, then the sort was successful
      int issorted(link * curr)
      { if (curr==NULL) return 1;
        int prev=curr->data;
        curr=curr->next;
        while (curr!=NULL)
        { int d=curr->data;
          if (d<prev) return 0;
          prev=d;
          curr=curr->next; }
        return 1; }
      
                // prints an entire list, but with a "|" symbol just before
                // the designated end. Useful for debugging
      void print(link * curr, link * end = NULL)
      { while (curr!=NULL)
        { printf(" %d", curr->data);
          curr=curr->next; 
          if (curr==end) printf(" |"); }
        printf("\n"); }
      
                // prints an entire list, but with a "|" symbol just before
                // the Nth item, Useful for debugging.
      void print(link * curr, int n)
      { if (n==0) printf(" |");
        for (int i=0; curr!=NULL; i+=1)
        { printf(" %d", curr->data);
          curr=curr->next; 
          if (i+1==n) printf(" |"); }
        printf("\n"); }
      
                // an answer object is returned by sort. It contains the
                // first link in the sorted part of the list and the link
                // following the end of the sorted part.
                // I think I forgot to include the constructor in class.
      struct answer
      { link * first;
        link * end;
        answer(link * a, link * b): first(a), end(b) { } };
      
                // to measure the speed, numops is incremented every time
                // a basic operation happens (compariosn followed by conditional
                // movement of data
      int numops=0;
      
                // First param: first link in list to be sorted,
                // Second: number of links to be sorted, all after are left alone,
                // Third: not relevant, used when debugging to indent output.
      answer sort(link * first, int num, int dp=1)
      { if (debug) { printf("%*ssort(%d) of", dp*2, "", num); print(first, num); }
        if (first==NULL)
        { if (debug) printf("%*s= |\n", dp*2, "");
          return answer(NULL, NULL); }
        if (num==0)
        { if (debug) { printf("%*s=", dp*2, ""); print(first, NULL); }
          return answer(first, NULL); }
        if (num==1)
        { if (debug) { printf("%*s=", dp*2, ""); print(first, first->next); }
          numops+=1;
          return answer(first, first->next); }
                // special case of sorting a two element list isn't worth splitting
                // and merging, so is done "by hand".
        if (num==2)
        { if (first->next==NULL)
          { if (debug) { printf("%*s=", dp*2, ""); print(first, NULL); }
            return answer(first, NULL); }
          int d1=first->data;
          int d2=first->next->data;
          if (d1>d2)
          { first->data=d2;
            first->next->data=d1; }
                          // I count this as 2 operations because 2 pieces of data
                          // got moved, but these two operations really take less
                          // time than one normal operation because this is such a
                          // simple step. This just means that the algorithm will
                          // really be just a tiny bit faster than the numops count
                          // would suggest.
          numops+=2;
          if (debug) { printf("%*s=", dp*2, ""); print(first, first->next->next); }
          return answer(first, first->next->next); }
        int halfnum=num/2;
                                            // Sort the first half of the list
        answer firstsort = sort(first, halfnum, dp+1);
        link * halfway = firstsort.end;
                                            // Sort the second half of the list
        answer secondsort = sort(halfway, num-halfnum, dp+1);
        link * end = secondsort.end;
        link * part1 = firstsort.first;
        link * part2 = secondsort.first;
                                            // Now start merging the two...
               // result will be used to build up the final merged list, but it
               // must be built in order, so we need to keep track of where the
               // end of the list is too.
        link * result = NULL, * endres = NULL;
        while (part1!=firstsort.end && part2!=secondsort.end)
        { int d1=part1->data;
          int d2=part2->data;
          link * best=NULL;
          if (d1<d2)
          { best=part1;
            part1=part1->next; }
          else
          { best=part2;
            part2=part2->next; }
                // best is the link from either part1 or part2 which has the
                // smallest value in it. It must be added to the end of the
                // result list.
                      // if this is the first thing in the result list, it is
                      // simply taken as the beginning; if there is already
                      // something in the result list, we can use endres to
                      // find the end and add it there.
          if (result==NULL)
            result=best;
          else
            endres->next=best;
          endres=best;
          numops+=1; }
                        // the loop stops when either of the lists runs out;
                        // the remaining elements of the one non-empty list
                        // must still be appended to the result list.
        while (part1!=firstsort.end)
        { endres->next=part1;
          endres=part1;
          part1=part1->next;
          numops+=1; }
        while (part2!=secondsort.end)
        { endres->next=part2;
          endres=part2;
          part2=part2->next;
          numops+=1; }
                       // and finally the untouched portion of the input list
                       // must be linked to the end, to keep the original list
                       // whole.
        endres->next=end;
        if (debug) { printf("%*s=", dp*2, ""); print(result, end); }
        return answer(result, end); }
      
      void main(int argc, char * argv[])
      {                // starting here, just administrative stuff to deal with
                       // any command line arguments
        if (argc<=1)
        { printf("first argument is length of list\n");
          printf("optional second is 1 for tracing\n");
          printf("                or 0 to prevent printing the list\n");
          exit(1); }
        debug=0;
        int doprint=1;
        int listlen=atol(argv[1]);
        if (argc>=3)
        { int n=atol(argv[2]);
          if (n==1)
            debug=1;
          else if (n==0)
            doprint=0; }
                       // this is where the program really starts
        srandomdev();
        for (int i=0; i<listlen; i+=1)
          add_random_data(list);
        makestats(list, stats1);
        if (doprint)
        { printf("random:");
          print(list); }
        numops=0;
        answer a=sort(list, listlen);
        list=a.first;
        if (doprint)
        { printf("sorted:");
          print(list);  }
        makestats(list, stats2);
        if (issame(stats1, stats2) && issorted(list))
          printf("Sort verified\n");
        else
          printf("Sort FAILED!!!!!\n");
        printf("It took %d operations\n", numops); }
      
      /*
      $ CC llsort.cpp -o llsort
      
      $ llsort 10
      random: 95 72 83 40 75 24 73 95 65 32 |
      sorted: 24 32 40 65 72 73 75 83 95 95 |
      Sort verified
      It took 36 operations
      
      $ llsort 20
      random: 78 93 28 54 43 59 99 57 20 82 71 73 68 21 63 33 81 61 35 51 |
      sorted: 20 21 28 33 35 43 51 54 57 59 61 63 68 71 73 78 81 82 93 99 |
      Sort verified
      It took 92 operations
      
      $ llsort 35
      random: 92 46 55 30 42 79 89 89 50 78 31 14 10 80 87 38 67 10 46 16 51 10 99 29 55 41 67 54 93 56 52 72 52 76 79 |
      sorted: 10 10 10 14 16 29 30 31 38 41 42 46 46 50 51 52 52 54 55 55 56 67 67 72 76 78 79 79 80 87 89 89 92 93 99 |
      Sort verified
      It took 184 operations
      
      $ llsort 20 1
      random: 86 61 83 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36 |
        sort(20) of 86 61 83 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36 |
          sort(10) of 86 61 83 47 88 61 71 43 80 95 | 39 96 64 68 99 29 60 18 65 36
            sort(5) of 86 61 83 47 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
              sort(2) of 86 61 | 83 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
              = 61 86 | 83 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
              sort(3) of 83 47 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
                sort(1) of 83 | 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
                = 83 | 47 88 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
                sort(2) of 47 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
                = 47 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
              = 47 83 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
            = 47 61 83 86 88 | 61 71 43 80 95 39 96 64 68 99 29 60 18 65 36
            sort(5) of 61 71 43 80 95 | 39 96 64 68 99 29 60 18 65 36
              sort(2) of 61 71 | 43 80 95 39 96 64 68 99 29 60 18 65 36
              = 61 71 | 43 80 95 39 96 64 68 99 29 60 18 65 36
              sort(3) of 43 80 95 | 39 96 64 68 99 29 60 18 65 36
                sort(1) of 43 | 80 95 39 96 64 68 99 29 60 18 65 36
                = 43 | 80 95 39 96 64 68 99 29 60 18 65 36
                sort(2) of 80 95 | 39 96 64 68 99 29 60 18 65 36
                = 80 95 | 39 96 64 68 99 29 60 18 65 36
              = 43 80 95 | 39 96 64 68 99 29 60 18 65 36
            = 43 61 71 80 95 | 39 96 64 68 99 29 60 18 65 36
          = 43 47 61 61 71 80 83 86 88 95 | 39 96 64 68 99 29 60 18 65 36
          sort(10) of 39 96 64 68 99 29 60 18 65 36 |
            sort(5) of 39 96 64 68 99 | 29 60 18 65 36
              sort(2) of 39 96 | 64 68 99 29 60 18 65 36
              = 39 96 | 64 68 99 29 60 18 65 36
              sort(3) of 64 68 99 | 29 60 18 65 36
                sort(1) of 64 | 68 99 29 60 18 65 36
                = 64 | 68 99 29 60 18 65 36
                sort(2) of 68 99 | 29 60 18 65 36
                = 68 99 | 29 60 18 65 36
              = 64 68 99 | 29 60 18 65 36
            = 39 64 68 96 99 | 29 60 18 65 36
            sort(5) of 29 60 18 65 36 |
              sort(2) of 29 60 | 18 65 36
              = 29 60 | 18 65 36
              sort(3) of 18 65 36 |
                sort(1) of 18 | 65 36
                = 18 | 65 36
                sort(2) of 65 36 |
                = 36 65 |
              = 18 36 65 |
            = 18 29 36 60 65 |
          = 18 29 36 39 60 64 65 68 96 99 |
        = 18 29 36 39 43 47 60 61 61 64 65 68 71 80 83 86 88 95 96 99 |
      sorted: 18 29 36 39 43 47 60 61 61 64 65 68 71 80 83 86 88 95 96 99 |
      Sort verified
      It took 92 operations
      
      $ llsort 1024 0
      Sort verified
      It took 10240 operations
                                 // 1024 * log2(1024) = 1024*10 = 10240
      
      $ llsort 2048 0
      Sort verified
      It took 22528 operations
                                 // 2048 * log2(2048) = 2048*11 = 22528
      
      $ llsort 4096 0
      Sort verified
      It took 49152 operations
                                 // 4096 * log2(4096) = 4096*12 = 49152
      
      $ llsort 8192 0
      Sort verified
      It took 106496 operations
                                 // 8192 * log2(8192) = 8192*13 = 106496
      
      $ llsort 1048576 0
      Sort verified
      It took 20971520 operations
                                 // 1048576 * log2(1048576) = 1048576*20 = 20971520 
      
      $ 
      */