/* PAQC - Compression program for Calgary challenge

(C) 2004, Matt Mahoney, mmahoney@cs.fit.edu
Distributed free under GPL, http://www.gnu.org/licenses/gpl.txt

This program was used to create a winning archive in the Calgary
challenge (http://mailcom.com/challenge/) of 645,667 bytes on
Jan. 10, 2004.  Create the archive as follows:

  paqc -1 v news bib book1 book2 paper1 paper2 progc progl progp trans
  paqc -2 w pic
  paqc -3 x geo
  paqc -3 y obj1
  paqc -3 z obj2

The decompressor, d.cpp, is a stripped down version of this program,
removing compresion code, some error checking, comments, and excess
spaces.  Either this program or d.cpp could be used to decompres the
above files.

The winning archive was produced by packing d.cpp and the above 5 files
into a RAR archive, choosing best compression for d.cpp and storing the
other files.

PAQC is derived from PAQ6 by tuning it to the Calgary corpus.  It
differs mainly in the addition of a model for pic, selected by the
-2 option, and the removal of models not important to the Calgary
corpus.  It can be used as a general compresor.  The compression
option should be -1 for text, -2 for CCITT images (216 bytes per
scan line), and -3 for other binary files.  Memory usage is fixed
at 190 MB.  Other minor differences:

- CharModel is called AllModel (order 8 context), used for -1 and -3.
- CounerMap allows sharing hash table space among multiple contexts.
- RecordModel, ExeModel, and AnalogModel are removed.
- SparseModel is now ObjModel, used for -3.
- Mixer context is changed slightly.  There is no need to distinguish
  text and binary in the mixer context as the option selects this.
- There are 4X more pointers in the MatchModel hash table.

Compression is about 4K better than PAQ6.  Of this, 3K is due to
PicModel and 1K due to the removal of extraneous models for text.
The other changes result in only very small improvements.  The 5
archive files by themselves total 639,567 bytes.

The previous challenge was a tuned variant of SLIM 12 submitted in Nov.
2002 by Serge Voskoboynikov, 653,720 bytes as a HA archive containing
an executable decompressor (d.exe) and 5 compressed files (named v,w,x,y,z
but in a different order) using the same grouping.  The file sizes are
shown below.  Note that SLIM compresses pic better by 6K and geo by 1K.
PAQC gets better compression mainly due to the sparse models (ObjModel)
for obj1 and obj2, and improved text compression.  Also, source code
submissions were not allowed at the time of the SLIM submission.
I found that RAR compresses d.cpp about 1K smaller than HA.

                     SLIM    PAQC
                   ------  ------
  Archive              HA     RAR
  Decompressor      17920   25671
    (compressed)     9402    5871
  text (10 files)  512519  507426
  pic               20478   26072
  geo               44051   45346
  obj1               8698    8154
  obj2              58426   52569
  archive header      146     229
  -----            ------  ------
  Total            653720  645667

I should note that SLIM has improved greatly in the last year, now
giving better text compression than PAQC (506906 on the 10 concatenated
files), so I would not be surprised if it soon regains the lead.

Thanks to Serge Osnach for introducing me to SSE (in PAQ1SSE/PAQ2) and
the sparse models (PAQ3N).  Also, credit to Eugene Shelwein,
Dmitry Shkarin for suggestions on using multiple character SSE contexts.
Credit to Eugene, Serge, and Jason Schmidt for developing faster and
smaller executables of previous versions.  Credit to Werner Bergmans
and Berto Destasio for testing and evaluating them, including modifications
that improve compression at the cost of more memory.  Credit to
Alexander Ratushnyak who found a bug in PAQ4 decompression, and also
in PAQ6 decompression for very small files (both fixed).
Thanks to Berto for tuning PAQ5, including revised counter state tables,
resulting in PAQ6.  Thanks to Jason for suggesting increasing PSCALE.
Thanks to Fabio Buffoni who pointed out some optimizations in the mixer.
*/

#define PROGNAME "PAQC"  // Please change this if you change the program

#define hash ___hash  // To avoid Digital MARS name collision
#include <cstdio>
#include <cstdlib>
#include <cctype>
#include <cmath>
#include <ctime>
#include <cassert>
#include <new>
#include <string>
#include <vector>
#include <algorithm>
#undef hash
using namespace std;

const int PSCALE=4096;  // Integer scale for representing probabilities
int MEM=1;  // 1=text, 2=pic, 3=other binary (doesn't affect memory usage)

template <class T> inline int size(const T& t) {return t.size();}

// 8-32 bit unsigned types, adjust as appropriate
typedef unsigned char U8;
typedef unsigned short U16;
typedef unsigned long U32;

// Fail if out of memory
void handler() {
  printf("Out of memory\n");
  exit(1);
}

// A ProgramChecker verifies some environmental assumptions and sets the
// out of memory handler.  It also gets the program starting time.
// The global object programChecker should be initialized before any
// other global objects.

class ProgramChecker {
  clock_t start;
public:
  ProgramChecker() {
    start=clock();
    set_new_handler(handler);

    // Test the compiler for common but not guaranteed assumptions
    assert(sizeof(U8)==1);
    assert(sizeof(U16)==2);
    assert(sizeof(U32)==4);
    assert(sizeof(int)==4);
  }
  clock_t start_time() const {return start;}  // When the program started
} programChecker;

//////////////////////////// rnd ////////////////////////////

// 32-bit random number generator based on r(i) = r(i-24) ^ r(i-55)

class Random {
  U32 table[55];  // Last 55 random values
  int i;  // Index of current random value in table
public:
  Random();
  U32 operator()() {  // Return 32-bit random number
    if (++i==55) i=0;
    if (i>=24) return table[i]^=table[i-24];
    else return table[i]^=table[i+31];
  }
} rnd;

Random::Random(): i(0) {  // Seed the table
  table[0]=123456789;
  table[1]=987654321;
  for (int j=2; j<55; ++j)
    table[j]=table[j-1]*11+table[j-2]*19/16;					
}

//////////////////////////// hash ////////////////////////////

// Hash functoid, returns 32 bit hash of 1-4 chars

class Hash {
  U32 table[8][256];  // Random number table
public:
  Hash() {
    for (int i=7; i>=0; --i)
      for (int j=0; j<256; ++j)
        table[i][j]=rnd();
    assert(table[0][255]==3610026313LU);
  }
  U32 operator()(U8 i0) const {
    return table[0][i0];
  }
  U32 operator()(U8 i0, U8 i1) const {
    return table[0][i0]+table[1][i1];
  }
  U32 operator()(U8 i0, U8 i1, U8 i2) const {
    return table[0][i0]+table[1][i1]+table[2][i2];
  }
  U32 operator()(U8 i0, U8 i1, U8 i2, U8 i3) const {
    return table[0][i0]+table[1][i1]+table[2][i2]+table[3][i3];
  }
} hash;

//////////////////////////// Counter ////////////////////////////

/* A Counter represents a pair (n0, n1) of counts of 0 and 1 bits
in a context.

  get0() -- returns p(0) with weight n = get0()+get1()
  get1() -- returns p(1) with weight n
  add(y) -- increments n_y, where y is 0 or 1 and decreases n_1-y
  priority() -- Returns a priority (n) for hash replacement such that
    higher numbers should be favored.
*/

class Counter {
  U8 state;
  struct E {      // State table entry
    U16 n0, n1;   // get0(), get1()
    U8 s00, s01;  // Next state on input 0 without/with probabilistic incr.
    U8 s10, s11;  // Next state on input 1
    U32 p0, p1;   // Probability of increment x 2^32 on inputs 0, 1
  };
  static E table[];  // State table
public:
  Counter(): state(0) {}
  int get0() const {return table[state].n0;}
  int get1() const {return table[state].n1;}
  int priority() const {return get0()+get1();}
  void add(int y) {
    if (y) {
      if (state<208 || rnd()<table[state].p1)
        state=table[state].s11;
      else
        state=table[state].s10;
    }
    else {
      if (state<208 || rnd()<table[state].p0)
        state=table[state].s01;
      else
        state=table[state].s00;
    }
  }
};

// State table generated by stgen6.cpp
Counter::E Counter::table[] = {
//  get0 get1 s00 s01 s10 s11  p(s01)       p(s11)    state n0,n1
//  ---- ---- --- --- --- --- ---------  ----------   ----- -- --
    {  0,  0,  0,  2,  0,  1,4294967295u,4294967295u}, // 0 (0,0)
    {  0,  4,  1,  4,  1,  3,4294967295u,4294967295u}, // 1 (0,1)
    {  4,  0,  2,  5,  2,  4,4294967295u,4294967295u}, // 2 (1,0)
    {  0,  8,  1,  4,  3,  6,4294967295u,4294967295u}, // 3 (0,2)
    {  1,  1,  4,  8,  4,  7,4294967295u,4294967295u}, // 4 (1,1)
    {  8,  0,  5,  9,  2,  4,4294967295u,4294967295u}, // 5 (2,0)
    {  0, 12,  1,  4,  6, 10,4294967295u,4294967295u}, // 6 (0,3)
    {  1,  2,  4,  8,  7, 11,4294967295u,4294967295u}, // 7 (1,2)
    {  2,  1,  8, 13,  4,  7,4294967295u,4294967295u}, // 8 (2,1)
    { 12,  0,  9, 14,  2,  4,4294967295u,4294967295u}, // 9 (3,0)
    {  0, 16,  3,  7, 10, 15,4294967295u,4294967295u}, // 10 (0,4)
    {  1,  3,  4,  8, 11, 16,4294967295u,4294967295u}, // 11 (1,3)
    {  1,  1,  8, 13,  7, 11,4294967295u,4294967295u}, // 12 (2,2)
    {  3,  1, 13, 19,  4,  7,4294967295u,4294967295u}, // 13 (3,1)
    { 16,  0, 14, 20,  5,  8,4294967295u,4294967295u}, // 14 (4,0)
    {  0, 20,  3,  7, 15, 21,4294967295u,4294967295u}, // 15 (0,5)
    {  1,  4,  7, 12, 16, 22,4294967295u,4294967295u}, // 16 (1,4)
    {  1,  1,  8, 13, 11, 16,4294967295u,4294967295u}, // 17 (2,3)
    {  1,  1, 13, 19,  7, 11,4294967295u,4294967295u}, // 18 (3,2)
    {  4,  1, 19, 26,  8, 12,4294967295u,4294967295u}, // 19 (4,1)
    { 20,  0, 20, 27,  5,  8,4294967295u,4294967295u}, // 20 (5,0)
    {  0, 24,  6, 11, 21, 28,4294967295u,4294967295u}, // 21 (0,6)
    {  1,  5,  7, 12, 22, 29,4294967295u,4294967295u}, // 22 (1,5)
    {  1,  2, 12, 18, 16, 22,4294967295u,4294967295u}, // 23 (2,4)
    {  1,  1, 13, 19, 11, 16,4294967295u,4294967295u}, // 24 (3,3)
    {  2,  1, 19, 26, 12, 17,4294967295u,4294967295u}, // 25 (4,2)
    {  5,  1, 26, 34,  8, 12,4294967295u,4294967295u}, // 26 (5,1)
    { 24,  0, 27, 35,  9, 13,4294967295u,4294967295u}, // 27 (6,0)
    {  0, 28,  6, 11, 28, 36,4294967295u,4294967295u}, // 28 (0,7)
    {  1,  6, 11, 17, 29, 37,4294967295u,4294967295u}, // 29 (1,6)
    {  1,  2, 12, 18, 22, 29,4294967295u,4294967295u}, // 30 (2,5)
    {  1,  1, 18, 25, 16, 22,4294967295u,4294967295u}, // 31 (3,4)
    {  1,  1, 19, 26, 17, 23,4294967295u,4294967295u}, // 32 (4,3)
    {  2,  1, 26, 34, 12, 17,4294967295u,4294967295u}, // 33 (5,2)
    {  6,  1, 34, 43, 13, 18,4294967295u,4294967295u}, // 34 (6,1)
    { 28,  0, 35, 44,  9, 13,4294967295u,4294967295u}, // 35 (7,0)
    {  0, 32, 10, 16, 36, 45,4294967295u,4294967295u}, // 36 (0,8)
    {  1,  7, 11, 17, 37, 46,4294967295u,4294967295u}, // 37 (1,7)
    {  1,  3, 17, 24, 29, 37,4294967295u,4294967295u}, // 38 (2,6)
    {  1,  1, 18, 25, 22, 29,4294967295u,4294967295u}, // 39 (3,5)
    {  1,  1, 25, 33, 23, 30,4294967295u,4294967295u}, // 40 (4,4)
    {  1,  1, 26, 34, 17, 23,4294967295u,4294967295u}, // 41 (5,3)
    {  3,  1, 34, 43, 18, 24,4294967295u,4294967295u}, // 42 (6,2)
    {  7,  1, 43, 53, 13, 18,4294967295u,4294967295u}, // 43 (7,1)
    { 32,  0, 44, 54, 14, 19,4294967295u,4294967295u}, // 44 (8,0)
    {  0, 36, 10, 16, 45, 55,4294967295u,4294967295u}, // 45 (0,9)
    {  1,  8, 16, 23, 46, 56,4294967295u,4294967295u}, // 46 (1,8)
    {  1,  3, 17, 24, 37, 46,4294967295u,4294967295u}, // 47 (2,7)
    {  1,  2, 24, 32, 29, 37,4294967295u,4294967295u}, // 48 (3,6)
    {  1,  1, 25, 33, 30, 38,4294967295u,4294967295u}, // 49 (4,5)
    {  1,  1, 33, 42, 23, 30,4294967295u,4294967295u}, // 50 (5,4)
    {  2,  1, 34, 43, 24, 31,4294967295u,4294967295u}, // 51 (6,3)
    {  3,  1, 43, 53, 18, 24,4294967295u,4294967295u}, // 52 (7,2)
    {  8,  1, 53, 61, 19, 25,4294967295u,4294967295u}, // 53 (8,1)
    { 36,  0, 54, 62, 14, 19,4294967295u,4294967295u}, // 54 (9,0)
    {  0, 40, 15, 22, 55, 63,4294967295u,4294967295u}, // 55 (0,10)
    {  1,  9, 16, 23, 56, 64,4294967295u,4294967295u}, // 56 (1,9)
    {  1,  4, 23, 31, 46, 56,4294967295u,4294967295u}, // 57 (2,8)
    {  1,  2, 24, 32, 37, 46,4294967295u,4294967295u}, // 58 (3,7)
    {  2,  1, 43, 53, 24, 31,4294967295u,4294967295u}, // 59 (7,3)
    {  4,  1, 53, 61, 25, 32,4294967295u,4294967295u}, // 60 (8,2)
    {  9,  1, 61, 69, 19, 25,4294967295u,4294967295u}, // 61 (9,1)
    { 40,  0, 62, 70, 20, 26,4294967295u,4294967295u}, // 62 (10,0)
    {  0, 44, 15, 22, 63, 71,4294967295u,4294967295u}, // 63 (0,11)
    {  1, 10, 22, 30, 64, 72,4294967295u,4294967295u}, // 64 (1,10)
    {  1,  4, 23, 31, 56, 64,4294967295u,4294967295u}, // 65 (2,9)
    {  1,  2, 31, 40, 46, 56,4294967295u,4294967295u}, // 66 (3,8)
    {  2,  1, 53, 61, 32, 40,4294967295u,4294967295u}, // 67 (8,3)
    {  4,  1, 61, 69, 25, 32,4294967295u,4294967295u}, // 68 (9,2)
    { 10,  1, 69, 77, 26, 33,4294967295u,4294967295u}, // 69 (10,1)
    { 44,  0, 70, 78, 20, 26,4294967295u,4294967295u}, // 70 (11,0)
    {  0, 48, 21, 29, 71, 79,4294967295u,4294967295u}, // 71 (0,12)
    {  1, 11, 22, 30, 72, 80,4294967295u,4294967295u}, // 72 (1,11)
    {  1,  5, 30, 39, 64, 72,4294967295u,4294967295u}, // 73 (2,10)
    {  1,  3, 31, 40, 56, 64,4294967295u,4294967295u}, // 74 (3,9)
    {  3,  1, 61, 69, 32, 40,4294967295u,4294967295u}, // 75 (9,3)
    {  5,  1, 69, 77, 33, 41,4294967295u,4294967295u}, // 76 (10,2)
    { 11,  1, 77, 85, 26, 33,4294967295u,4294967295u}, // 77 (11,1)
    { 48,  0, 78, 86, 27, 34,4294967295u,4294967295u}, // 78 (12,0)
    {  0, 52, 21, 29, 79, 87,4294967295u,4294967295u}, // 79 (0,13)
    {  1, 12, 29, 38, 80, 88,4294967295u,4294967295u}, // 80 (1,12)
    {  1,  5, 30, 39, 72, 80,4294967295u,4294967295u}, // 81 (2,11)
    {  1,  3, 39, 49, 64, 72,4294967295u,4294967295u}, // 82 (3,10)
    {  3,  1, 69, 77, 41, 50,4294967295u,4294967295u}, // 83 (10,3)
    {  5,  1, 77, 85, 33, 41,4294967295u,4294967295u}, // 84 (11,2)
    { 12,  1, 85, 91, 34, 42,4294967295u,4294967295u}, // 85 (12,1)
    { 52,  0, 86, 92, 27, 34,4294967295u,4294967295u}, // 86 (13,0)
    {  0, 56, 28, 37, 87, 93,4294967295u,4294967295u}, // 87 (0,14)
    {  1, 13, 29, 38, 88, 94,4294967295u,4294967295u}, // 88 (1,13)
    {  1,  6, 38, 48, 80, 88,4294967295u,4294967295u}, // 89 (2,12)
    {  6,  1, 85, 91, 42, 51,4294967295u,4294967295u}, // 90 (12,2)
    { 13,  1, 91, 97, 34, 42,4294967295u,4294967295u}, // 91 (13,1)
    { 56,  0, 92, 98, 35, 43,4294967295u,4294967295u}, // 92 (14,0)
    {  0, 60, 28, 37, 93, 99,4294967295u,4294967295u}, // 93 (0,15)
    {  1, 14, 37, 47, 94,100,4294967295u,4294967295u}, // 94 (1,14)
    {  1,  6, 38, 48, 88, 94,4294967295u,4294967295u}, // 95 (2,13)
    {  6,  1, 91, 97, 42, 51,4294967295u,4294967295u}, // 96 (13,2)
    { 14,  1, 97,103, 43, 52,4294967295u,4294967295u}, // 97 (14,1)
    { 60,  0, 98,104, 35, 43,4294967295u,4294967295u}, // 98 (15,0)
    {  0, 64, 36, 46, 99,105,4294967295u,4294967295u}, // 99 (0,16)
    {  1, 15, 37, 47,100,106,4294967295u,4294967295u}, // 100 (1,15)
    {  1,  7, 47, 58, 94,100,4294967295u,4294967295u}, // 101 (2,14)
    {  7,  1, 97,103, 52, 59,4294967295u,4294967295u}, // 102 (14,2)
    { 15,  1,103,109, 43, 52,4294967295u,4294967295u}, // 103 (15,1)
    { 64,  0,104,110, 44, 53,4294967295u,4294967295u}, // 104 (16,0)
    {  0, 68, 36, 46,105,111,4294967295u,4294967295u}, // 105 (0,17)
    {  1, 16, 46, 57,106,112,4294967295u,4294967295u}, // 106 (1,16)
    {  1,  7, 47, 58,100,106,4294967295u,4294967295u}, // 107 (2,15)
    {  7,  1,103,109, 52, 59,4294967295u,4294967295u}, // 108 (15,2)
    { 16,  1,109,113, 53, 60,4294967295u,4294967295u}, // 109 (16,1)
    { 68,  0,110,114, 44, 53,4294967295u,4294967295u}, // 110 (17,0)
    {  0, 72, 45, 56,111,115,4294967295u,4294967295u}, // 111 (0,18)
    {  1, 17, 46, 57,112,116,4294967295u,4294967295u}, // 112 (1,17)
    { 17,  1,113,119, 53, 60,4294967295u,4294967295u}, // 113 (17,1)
    { 72,  0,114,120, 54, 61,4294967295u,4294967295u}, // 114 (18,0)
    {  0, 76, 45, 56,115,121,4294967295u,4294967295u}, // 115 (0,19)
    {  1, 18, 56, 65,116,122,4294967295u,4294967295u}, // 116 (1,18)
    {  1,  8, 57, 66,112,116,4294967295u,4294967295u}, // 117 (2,17)
    {  8,  1,113,119, 60, 67,4294967295u,4294967295u}, // 118 (17,2)
    { 18,  1,119,125, 61, 68,4294967295u,4294967295u}, // 119 (18,1)
    { 76,  0,120,126, 54, 61,4294967295u,4294967295u}, // 120 (19,0)
    {  0, 80, 55, 64,121,127,4294967295u,4294967295u}, // 121 (0,20)
    {  1, 19, 56, 65,122,128,4294967295u,4294967295u}, // 122 (1,19)
    {  1,  9, 65, 74,116,122,4294967295u,4294967295u}, // 123 (2,18)
    {  9,  1,119,125, 68, 75,4294967295u,4294967295u}, // 124 (18,2)
    { 19,  1,125,131, 61, 68,4294967295u,4294967295u}, // 125 (19,1)
    { 80,  0,126,132, 62, 69,4294967295u,4294967295u}, // 126 (20,0)
    {  0, 84, 55, 64,127,133,4294967295u,4294967295u}, // 127 (0,21)
    {  1, 20, 64, 73,128,134,4294967295u,4294967295u}, // 128 (1,20)
    {  1,  9, 65, 74,122,128,4294967295u,4294967295u}, // 129 (2,19)
    {  9,  1,125,131, 68, 75,4294967295u,4294967295u}, // 130 (19,2)
    { 20,  1,131,137, 69, 76,4294967295u,4294967295u}, // 131 (20,1)
    { 84,  0,132,138, 62, 69,4294967295u,4294967295u}, // 132 (21,0)
    {  0, 88, 63, 72,133,139,4294967295u,4294967295u}, // 133 (0,22)
    {  1, 21, 64, 73,134,140,4294967295u,4294967295u}, // 134 (1,21)
    {  1, 10, 73, 82,128,134,4294967295u,4294967295u}, // 135 (2,20)
    { 10,  1,131,137, 76, 83,4294967295u,4294967295u}, // 136 (20,2)
    { 21,  1,137,143, 69, 76,4294967295u,4294967295u}, // 137 (21,1)
    { 88,  0,138,144, 70, 77,4294967295u,4294967295u}, // 138 (22,0)
    {  0, 92, 63, 72,139,145,4294967295u,4294967295u}, // 139 (0,23)
    {  1, 22, 72, 81,140,146,4294967295u,4294967295u}, // 140 (1,22)
    {  1, 10, 73, 82,134,140,4294967295u,4294967295u}, // 141 (2,21)
    { 10,  1,137,143, 76, 83,4294967295u,4294967295u}, // 142 (21,2)
    { 22,  1,143,147, 77, 84,4294967295u,4294967295u}, // 143 (22,1)
    { 92,  0,144,148, 70, 77,4294967295u,4294967295u}, // 144 (23,0)
    {  0, 96, 71, 80,145,149,4294967295u,4294967295u}, // 145 (0,24)
    {  1, 23, 72, 81,146,150,4294967295u,4294967295u}, // 146 (1,23)
    { 23,  1,147,151, 77, 84,4294967295u,4294967295u}, // 147 (23,1)
    { 96,  0,148,152, 78, 85,4294967295u,4294967295u}, // 148 (24,0)
    {  0,100, 63, 72,149,153,4294967295u,4294967295u}, // 149 (0,25)
    {  1, 24, 80, 89,150,154,4294967295u,4294967295u}, // 150 (1,24)
    { 24,  1,151,155, 85, 90,4294967295u,4294967295u}, // 151 (24,1)
    {100,  0,152,156, 70, 77,4294967295u,4294967295u}, // 152 (25,0)
    {  0,104, 63, 72,153,157,4294967295u,4294967295u}, // 153 (0,26)
    {  1, 25, 72, 81,154,158,4294967295u,4294967295u}, // 154 (1,25)
    { 25,  1,155,159, 77, 84,4294967295u,4294967295u}, // 155 (25,1)
    {104,  0,156,160, 70, 77,4294967295u,4294967295u}, // 156 (26,0)
    {  0,108, 63, 72,157,161,4294967295u,4294967295u}, // 157 (0,27)
    {  1, 26, 72, 81,158,162,4294967295u,4294967295u}, // 158 (1,26)
    { 26,  1,159,163, 77, 84,4294967295u,4294967295u}, // 159 (26,1)
    {108,  0,160,164, 70, 77,4294967295u,4294967295u}, // 160 (27,0)
    {  0,112, 63, 72,161,165,4294967295u,4294967295u}, // 161 (0,28)
    {  1, 27, 72, 81,162,166,4294967295u,4294967295u}, // 162 (1,27)
    { 27,  1,163,167, 77, 84,4294967295u,4294967295u}, // 163 (27,1)
    {112,  0,164,168, 70, 77,4294967295u,4294967295u}, // 164 (28,0)
    {  0,116, 63, 72,165,169,4294967295u,4294967295u}, // 165 (0,29)
    {  1, 28, 72, 81,166,170,4294967295u,4294967295u}, // 166 (1,28)
    { 28,  1,167,171, 77, 84,4294967295u,4294967295u}, // 167 (28,1)
    {116,  0,168,172, 70, 77,4294967295u,4294967295u}, // 168 (29,0)
    {  0,120, 63, 72,169,173,4294967295u,4294967295u}, // 169 (0,30)
    {  1, 29, 72, 81,170,174,4294967295u,4294967295u}, // 170 (1,29)
    { 29,  1,171,175, 77, 84,4294967295u,4294967295u}, // 171 (29,1)
    {120,  0,172,176, 70, 77,4294967295u,4294967295u}, // 172 (30,0)
    {  0,124, 63, 72,173,177,4294967295u,4294967295u}, // 173 (0,31)
    {  1, 30, 72, 81,174,178,4294967295u,4294967295u}, // 174 (1,30)
    { 30,  1,175,179, 77, 84,4294967295u,4294967295u}, // 175 (30,1)
    {124,  0,176,180, 70, 77,4294967295u,4294967295u}, // 176 (31,0)
    {  0,128, 63, 72,177,181,4294967295u,4294967295u}, // 177 (0,32)
    {  1, 31, 72, 81,178,182,4294967295u,4294967295u}, // 178 (1,31)
    { 31,  1,179,183, 77, 84,4294967295u,4294967295u}, // 179 (31,1)
    {128,  0,180,184, 70, 77,4294967295u,4294967295u}, // 180 (32,0)
    {  0,132, 63, 72,181,185,4294967295u,4294967295u}, // 181 (0,33)
    {  1, 32, 72, 81,182,186,4294967295u,4294967295u}, // 182 (1,32)
    { 32,  1,183,187, 77, 84,4294967295u,4294967295u}, // 183 (32,1)
    {132,  0,184,188, 70, 77,4294967295u,4294967295u}, // 184 (33,0)
    {  0,136, 63, 72,185,189,4294967295u,4294967295u}, // 185 (0,34)
    {  1, 33, 72, 81,186,190,4294967295u,4294967295u}, // 186 (1,33)
    { 33,  1,187,191, 77, 84,4294967295u,4294967295u}, // 187 (33,1)
    {136,  0,188,192, 70, 77,4294967295u,4294967295u}, // 188 (34,0)
    {  0,140, 63, 72,189,193,4294967295u,4294967295u}, // 189 (0,35)
    {  1, 34, 72, 81,190,194,4294967295u,4294967295u}, // 190 (1,34)
    { 34,  1,191,195, 77, 84,4294967295u,4294967295u}, // 191 (34,1)
    {140,  0,192,196, 70, 77,4294967295u,4294967295u}, // 192 (35,0)
    {  0,144, 71, 80,193,197,4294967295u,4294967295u}, // 193 (0,36)
    {  1, 35, 72, 81,194,198,4294967295u,4294967295u}, // 194 (1,35)
    { 35,  1,195,199, 77, 84,4294967295u,4294967295u}, // 195 (35,1)
    {144,  0,196,200, 78, 85,4294967295u,4294967295u}, // 196 (36,0)
    {  0,148, 71, 80,197,201,4294967295u,4294967295u}, // 197 (0,37)
    {  1, 36, 80, 89,198,202,4294967295u,4294967295u}, // 198 (1,36)
    { 36,  1,199,203, 85, 90,4294967295u,4294967295u}, // 199 (36,1)
    {148,  0,200,204, 78, 85,4294967295u,4294967295u}, // 200 (37,0)
    {  0,152, 71, 80,201,205,4294967295u,4294967295u}, // 201 (0,38)
    {  1, 37, 80, 89,202,206,4294967295u,4294967295u}, // 202 (1,37)
    { 37,  1,203,207, 85, 90,4294967295u,4294967295u}, // 203 (37,1)
    {152,  0,204,208, 78, 85,4294967295u,4294967295u}, // 204 (38,0)
    {  0,156, 71, 80,205,209,4294967295u,4294967295u}, // 205 (0,39)
    {  1, 38, 80, 89,206,210,4294967295u,4294967295u}, // 206 (1,38)
    { 38,  1,207,211, 85, 90,4294967295u,4294967295u}, // 207 (38,1)
    {156,  0,208,212, 78, 85,4294967295u,4294967295u}, // 208 (39,0)
    {  0,160, 71, 80,209,215,4294967295u,1073741823u}, // 209 (0,40)
    {  1, 39, 80, 89,210,213,4294967295u,4294967295u}, // 210 (1,39)
    { 39,  1,211,214, 85, 90,4294967295u,4294967295u}, // 211 (39,1)
    {160,  0,212,216, 78, 85,1073741823u,4294967295u}, // 212 (40,0)
    {  1, 40, 80, 89,213,217,4294967295u,1073741823u}, // 213 (1,40)
    { 40,  1,214,218, 85, 90,1073741823u,4294967295u}, // 214 (40,1)
    {  0,176, 71, 80,215,219,4294967295u,1073741823u}, // 215 (0,44)
    {176,  0,216,220, 78, 85,1073741823u,4294967295u}, // 216 (44,0)
    {  1, 44, 80, 89,217,221,4294967295u,1073741823u}, // 217 (1,44)
    { 44,  1,218,222, 85, 90,1073741823u,4294967295u}, // 218 (44,1)
    {  0,192, 71, 80,219,223,4294967295u, 536870911u}, // 219 (0,48)
    {192,  0,220,224, 78, 85, 536870911u,4294967295u}, // 220 (48,0)
    {  1, 48, 80, 89,221,225,4294967295u, 536870911u}, // 221 (1,48)
    { 48,  1,222,226, 85, 90, 536870911u,4294967295u}, // 222 (48,1)
    {  0,224, 79, 88,223,227,4294967295u, 536870911u}, // 223 (0,56)
    {224,  0,224,228, 86, 91, 536870911u,4294967295u}, // 224 (56,0)
    {  1, 56, 88, 95,225,229,4294967295u, 536870911u}, // 225 (1,56)
    { 56,  1,226,230, 91, 96, 536870911u,4294967295u}, // 226 (56,1)
    {  0,256, 87, 94,227,231,4294967295u, 134217727u}, // 227 (0,64)
    {256,  0,228,232, 92, 97, 134217727u,4294967295u}, // 228 (64,0)
    {  1, 64, 94,101,229,233,4294967295u, 134217727u}, // 229 (1,64)
    { 64,  1,230,234, 97,102, 134217727u,4294967295u}, // 230 (64,1)
    {  0,384, 93,100,231,235,4294967295u, 134217727u}, // 231 (0,96)
    {384,  0,232,236, 98,103, 134217727u,4294967295u}, // 232 (96,0)
    {  1, 96,100,107,233,237,4294967295u, 134217727u}, // 233 (1,96)
    { 96,  1,234,238,103,108, 134217727u,4294967295u}, // 234 (96,1)
    {  0,512,105,112,235,239,4294967295u, 134217727u}, // 235 (0,128)
    {512,  0,236,240,110,113, 134217727u,4294967295u}, // 236 (128,0)
    {  1,128,112,117,237,241,4294967295u, 134217727u}, // 237 (1,128)
    {128,  1,238,242,113,118, 134217727u,4294967295u}, // 238 (128,1)
    {  0,640,111,116,239,243,4294967295u, 134217727u}, // 239 (0,160)
    {640,  0,240,244,114,119, 134217727u,4294967295u}, // 240 (160,0)
    {  1,160,116,123,241,245,4294967295u, 134217727u}, // 241 (1,160)
    {160,  1,242,246,119,124, 134217727u,4294967295u}, // 242 (160,1)
    {  0,768,115,122,243,247,4294967295u, 134217727u}, // 243 (0,192)
    {768,  0,244,248,120,125, 134217727u,4294967295u}, // 244 (192,0)
    {  1,192,122,129,245,249,4294967295u, 134217727u}, // 245 (1,192)
    {192,  1,246,250,125,130, 134217727u,4294967295u}, // 246 (192,1)
    {  0,896,121,128,247,251,4294967295u, 138547332u}, // 247 (0,224)
    {896,  0,248,252,126,131, 138547332u,4294967295u}, // 248 (224,0)
    {  1,224,128,135,249,253,4294967295u, 138547332u}, // 249 (1,224)
    {224,  1,250,254,131,136, 138547332u,4294967295u}, // 250 (224,1)
    { 0,1020,127,134,251,251,4294967295u,         0u}, // 251 (0,255)
    {1020, 0,252,252,132,137,         0u,4294967295u}, // 252 (255,0)
    {  1,255,134,141,253,253,4294967295u,         0u}, // 253 (1,255)
    {255,  1,254,254,137,142,         0u,4294967295u}  // 254 (255,1)
};

//////////////////////////// ch ////////////////////////////

/* ch is a global object that provides common services to models.
It stores all the input so far in a rotating buffer of the last N bytes

  ch -- Global object
  ch.init() -- Initialize (after MEM is set)
  ch(i) -- Returns i'th byte from end
  ch(0) -- Returns the 0-7 bits of the partially read byte with a leading 1
  ch()  -- ch(0)
  ch.update(y) -- Appends bit y to the buffer
  ch.pos() -- The number of whole bytes appended, possibly > N
  ch.bpos() -- The number of bits (0-7) of the current partial byte at (0)
  ch[i] -- ch(pos()-i)
  ch.lo() -- Low order nibble so far (1-15 with leading 1)
  ch.hi() -- Previous nibble, 0-15 (no leading 1 bit)
  ch.pos(c) -- Position of the last occurrence of byte c (0-255)
  ch.pos(c, i) -- Position of the i'th to last occurrence, i = 0 to 3
*/
class Ch {
  U32 N;  // Buffer size
  U8 *buf;  // [N] last N bytes
  U32 p;  // pos()
  U32 bp;  // bpos()
  U32 hi_nibble, lo_nibble;  // hi(), lo()
  U32 lpos[256][4];  // pos(c, i)
public:
  Ch(): N(0), buf(0), p(0), bp(0), hi_nibble(0), lo_nibble(1) {
    memset(lpos, 0, 256*4*sizeof(U32));
  }
  void init() {
    N = 1 << 22;
    buf=(U8*)calloc(N, 1);
    if (!buf)
      handler();
    buf[0]=1;
  }
  U32 operator()(int i) const {return buf[(p-i)&(N-1)];}
  U32 operator()() const {return buf[p&(N-1)];}
  void update(int y) {
    U8& r=buf[p&(N-1)];
    r+=r+y;
    if (++bp==8) {
      lpos[r][3]=lpos[r][2];
      lpos[r][2]=lpos[r][1];
      lpos[r][1]=lpos[r][0];
      lpos[r][0]=p;
      bp=0;
      ++p;
      buf[p&(N-1)]=1;
    }
    if ((lo_nibble+=lo_nibble+y)>=16) {
      hi_nibble=lo_nibble-16;
      lo_nibble=1;
    }
  }
  U32 pos() const {return p;}
  U32 pos(U8 c, int i=0) const {return lpos[c][i&3];}
  U32 bpos() const {return bp;}
  U32 operator[](int i) const {return buf[i&(N-1)];}
  U32 hi() const {return hi_nibble;}
  U32 lo() const {return lo_nibble;}
} ch;  // Global object

//////////////////////////// Hashtable ////////////////////////////

/* A Hashtable stores Counters.  It is organized to minimize cache
misses for 64-byte cache lines.  The size is fixed at 2^n bytes.  It
uses LRU replacement for buckets of size 4, except that the next to
oldest element is replaced if it has lower priority than the oldest.
Each bucket represents 15 counters for a context on a half-byte boundary.

  Hashtable<Counter> ht(n) -- Create hash table of 2^n bytes (15/16 of
    these are 1-byte Counters).
  ht.set(h, sel) -- Set major context selected by sel to h, a 32 bit hash
    of a context ending on a nibble (4-bit) boundary.  0 <= sel < M.
  ht(c, sel) -- Retrieve a reference to counter associated with partial
    nibble c (1-15) in context h[sel].

Normally there should be 4 calls to ht(c, sel) after each ht.set(h, sel).
*/

template<class T>
class Hashtable {
private:
  const U32 N;  // log2 size in 16-byte elements
  struct HashElement {
    U8 checksum;  // Checksum of context, used to detect collisions
    T c[15];  // 1-byte counters in minor context c
    HashElement(): checksum(0) {}
  };
  HashElement *table;  // [2^N]
  U32 cxt[MODELS];  // major contexts
public:
  Hashtable(U32 n);

  // Uncomment this to print hash table usage statistics
  /*
  ~Hashtable() {
    int c1=0, c2=0;
    for (int i=0; i<(1<<N); ++i) {
      if (table[i].c[0].get0() || table[i].c[0].get1())
        ++c1;
      if (table[i].c[0].get0()==4 && table[i].c[0].get1()==0)
        ++c2;
      if (table[i].c[0].get0()==0 && table[i].c[0].get1()==4)
        ++c2;
    }
    printf("Hash table %1.4f%% of %d filled, %1.4f%% used once\n",
      100.0*c1/(1<<N), 1<<N, 100.0*c2/(1<<N));
  } */

  // Set major context[sel] to h, a 32 bit hash.  Create element if needed.
  void set(U32 h, U32 sel) {
    assert(sel<MODELS);
    h^=hash(h, h>>10, h>>21, sel);

    // Search 4 elements for h within a 64-byte cache line
    const U8 checksum=(h>>24)^h;
    const U32 lo= (h>>(32-N)) & -4;
    const U32 hi=lo+4;
    U32 i;
    for (i=lo; i<hi; ++i) {
      U32 pri=table[i].c[0].priority();
      if (table[i].checksum==checksum) { // found
        cxt[sel]=i;
        break;
      }
      else if (pri==0) {  // empty bucket
        table[i].checksum=checksum;
        cxt[sel]=i;
        break;
      }
    }

    // Put new element in front, pushing the lower priority of the two
    // oldest off the back
    if (i==hi) {
      cxt[sel]=lo;
      if (table[lo+2].c[0].priority()<table[lo+3].c[0].priority())
        memmove(table+lo+1, table+lo, 32);
      else
        memmove(table+lo+1, table+lo, 48);
      memset(table+lo, 0, 16);
      table[cxt[sel]].checksum=checksum;
    }

    // Move newest to front
    else if (cxt[sel]!=lo) {
      HashElement he=table[cxt[sel]];
      memmove(table+lo+1, table+lo, (cxt[sel]-lo)*16);
      table[lo]=he;
      cxt[sel]=lo;
    }
  }

  // Get element c (1-15) of bucket cxt[sel]
  T& operator()(U32 c, U32 sel) {
    --c;
    assert(c<15);
    assert(sel<MODELS);
    return table[cxt[sel]].c[c];
  }
};

template <class T>
Hashtable<T>::Hashtable(U32 n): N(n>4?n-4:1), table(0) {
  assert(sizeof(HashElement)==16);
  assert(sizeof(char)==1);
  assert(sizeof(cxt[0])==4);
  memset(cxt, 0, MODELS*4);

  // Align the hash table on a 64 byte cache page boundary
  char *p=(char*)calloc((16<<N)+64, 1);
  if (!cxt || !p)
    handler();
  p+=64-(((int)p)&63);  // Aligned
  table=(HashElement*)p;
}

//////////////////////////// mixer ////////////////////////////

/* A Mixer combines a weighted set of probabilities (expressed as 0 and
1 counts) into a single probability P(1) that the next bit will be a 1.

  Mixer m(C);      -- Create Mixer with C sets of N weights (N is fixed)
  m.write(n0, n1); -- Store a prediction P(1) = n1/(n0+n1), with confidence
                      0 <= n0+n1 < 1024.  There should be at most N calls
                      to write() followed by predict() and update().
                      Write order should be consistent.
  m.add(n0, n1);   -- Adds to a previous write.
  m.predict(c);    -- Return P(1)*PSCALE (range 0 to PSCALE-1) for
                      weight set c (0 to C-1).
  m.update(y);     -- Tune the N internal weights for set c such that
                      predict(c) would return a result closer to y*PSCALE,
                      y = 0 or 1.
*/

const U32 MODELS=64;

class Mixer {
  const int C;
  U32 *bc0, *bc1;  // 0,1 counts for MODELS models
  U32 (*wt)[MODELS];  // wt[c][n] is n'th weight in context c
  int n;  // number of bit count pairs written
  int c;  // weight set context
public:
  Mixer(int C_);
  ~Mixer();
  U32 getN() const {return MODELS;}
  U32 getC() const {return C;}

  // Store next counts n0, n1 from model
  void write(int n0, int n1) {
    bc0[n]=n0;
    bc1[n]=n1;
    ++n;
  }

  // Add to the last write
  void add(int n0, int n1) {
    bc0[n-1]+=n0;
    bc1[n-1]+=n1;
  }
  int predict(int c_);
  void update(int y);
};

// Return weighted average of models in context c_
int Mixer::predict(int c_) {
  assert(n>0 && n<=MODELS);
  assert(c_>=0 && c_<C);
  c=c_;
  int n0=1, n1=n0;
  for (int j=0; j<n; ++j) {
    U32 w=wt[c][j];
    n0+=bc0[j]*w;
    n1+=bc1[j]*w;
  }
  int sum=n0+n1;
  while (sum>2000000000/PSCALE) sum/=4, n1/=4;
  assert(sum>0);
  return (PSCALE-1)*n1/sum;
}

// Adjust the weights by gradient descent to reduce cost of bit y
void Mixer::update(int y) {
  U32 s0=0, s1=0;
  for (int i=0; i<n; ++i) {
    s0+=(wt[c][i]+48)*bc0[i];
    s1+=(wt[c][i]+48)*bc1[i];
  }
  if (s0>0 && s1>0) {
    const U32 s=s0+s1;
    const U32 sy=y?s1:s0;
    const U32 sy1=0xffffffff/sy+(rnd()&1023) >> 10;
    const U32 s1 =0xffffffff/s +(rnd()&1023) >> 10;
    for (int i=0; i<n; ++i) {
      const int dw=int((y?bc1[i]:bc0[i])*sy1-(bc0[i]+bc1[i])*s1
         + (rnd()&255)) >> 8;
      wt[c][i]=min(65535, max(1, int(wt[c][i]+dw)));
    }
  }
  n=0;
}

Mixer::Mixer(int C_): C(C_), bc0(new U32[MODELS]), bc1(new U32[MODELS]),
                      wt(new U32[C_][MODELS]), n(0), c(0) {
  for (int i=0; i<C; ++i) {
    for (int j=0; j<MODELS; ++j)
      wt[i][j]=1;
  }
  for (int i=0; i<MODELS; ++i)
    bc0[i]=bc1[i]=0;
}

Mixer::~Mixer() {
/*
  // Uncomment this to print the weights.  This is useful for testing
  // new models or weight vector contexts.
  if (n==0)
    return;
  printf("  ");
  for (int i=0; i<n; ++i)
    printf("%4d", i);
  printf("\n");
  fflush(stdout);
  for (int i=0; i<C && i<16; ++i) {
    printf("%2d", i);
    for (int j=0; j<n; ++j)
      printf("%4lu", wt[i][j]/10);
    printf("\n");
    fflush(stdout);
  } */
}

// A MultiMixer averages the output of 2 mixers using different contexts
class MultiMixer {
  enum {MIX2=1};  // Use 2 mixers
  Mixer m1, m2;
public:
  MultiMixer(): m1(8), m2(16) {}
  void write(int n0, int n1) {
    m1.write(n0, n1);
    if (MIX2)
      m2.write(n0, n1);
  }
  void add(int n0, int n1) {
    if (MIX2) {
      m1.add(n0, n1);
      m2.add(n0, n1);
    }
    else
      m1.add(n0, n1);
  }
  int predict() {
    U32 p1=m1.predict(ch(1) >> 5);
    if (MIX2) {
      U32 p2=m2.predict(ch(1)/32%4+ch(2)/32%4*4);
      return (p1+p2)>>1;
    }
    else
      return p1;
  }
  void update(int y) {
    m1.update(y);
      if (MIX2)
    m2.update(y);
  }
  U32 getC() const {return 256;}
  U32 getN() const {return m1.getN();}
};

MultiMixer mixer;

//////////////////////////// CounterMap ////////////////////////////

/* CounterMap maintains a model and one context

  Countermap cm(N); -- Create, size 2^N bytes
  cm.update(h);     -- Update model, then set next context hash to h
  cm.write();       -- Predict next bit and write counts to mixer
  cm.add();         -- Predict and add to previously written counts

There should be 8 calls to either write() or add() between each update(h).
h is a 32-bit hash of the context which should be set after a whole number
of bytes are read. */

// Stores only the most recent byte and its count per context (run length)
// in a hash table without collision detection
class CounterMap1 {
  const int N;
  struct S {
    U8 c;  // char
    U8 n;  // count
  };
  S* t;  // cxt -> c repeated last n times
  U32 cxt[MODELS];
public:
  CounterMap1(int n): N(n>1?n-1:1) {
    memset(cxt, 0, MODELS*4);
    assert(sizeof(S)==2);
    t=(S*)calloc(1<<N, 2);
    if (!t)
      handler();
  }
  void update(U32 h, U32 sel) {
    if (ch.bpos()==0) {
      if (t[cxt[sel]].n==0) {
        t[cxt[sel]].n=1;
        t[cxt[sel]].c=ch(1);
      }
      else if (U32(t[cxt[sel]].c)==ch(1)) {
        if (t[cxt[sel]].n<255)
          ++t[cxt[sel]].n;
      }
      else {
        t[cxt[sel]].c=ch(1);
        t[cxt[sel]].n=1;
      }
    }
    cxt[sel] = h >> 32-N;
  }
  void add(U32 sel) {
    if ((U32)((t[cxt[sel]].c+256) >> 8-ch.bpos())==ch()) {
      if ((t[cxt[sel]].c >> 7-ch.bpos()) & 1)
        mixer.add(0, t[cxt[sel]].n);
      else
        mixer.add(t[cxt[sel]].n, 0);
    }
  }
  void write(U32 sel) {
    mixer.write(0, 0);
    add(sel);
  }
};


// Uses a nibble-oriented hash table of contexts (counter state)
class CounterMap2 {
  const U32 N2;  // Size of ht2 in elements
  U32 cxt[MODELS];  // Major contexts
  Hashtable<Counter> ht2;  // Secondary hash table
  Counter* cp[MODELS][8];  // Pointers into ht2 or 0 if not used
public:
  CounterMap2(int n);  // Use 2^n bytes memory
  void add(U32 sel);
  void update(U32 h, U32 sel);
  void write(U32 sel) {
    mixer.write(0, 0);
    add(sel);
  }
};

CounterMap2::CounterMap2(int n): N2(n), ht2(N2) {
  for (int i=0; i<MODELS; ++i) {
    cxt[i]=0;
    for (int j=0; j<8; ++j)
      cp[i][j]=0;
  }
}

// Predict the next bit given the bits so far in ch()
void CounterMap2::add(U32 sel) {
  assert(sel<MODELS);
  const U32 bcount = ch.bpos();
  if (bcount==4) {
    cxt[sel]^=hash(ch.hi(), cxt[sel], sel);
    ht2.set(cxt[sel], sel);
  }
  cp[sel][bcount]=&ht2(ch.lo(), sel);
  mixer.add(cp[sel][bcount]->get0(), cp[sel][bcount]->get1());
}

// After 8 predictions, update the models with the last input char, ch(1),
// then set the new context hash to h
void CounterMap2::update(U32 h, U32 sel) {
  assert(sel<MODELS);
  const U32 c=ch(1);

  // Update the secondary context
  for (int i=0; i<8; ++i) {
    if (cp[sel][i]) {
      cp[sel][i]->add((c>>(7-i))&1);
      cp[sel][i]=0;
    }
  }
  cxt[sel]=h;
  ht2.set(cxt[sel], sel);
}

// Combines 1 and 2 above.
class CounterMap3 {
  enum {CM1=1};  // Use cm1
  CounterMap1 cm1;
  CounterMap2 cm2;
public:
  CounterMap3(int n): cm1(CM1 ? n-2 : 0), cm2(n) {}
  void update(U32 h, U32 sel=0) {
    if (CM1)
      cm1.update(h, sel);
    cm2.update(h, sel);
  }
  void write(U32 sel=0) {
    cm2.write(sel);
    if (CM1)
      cm1.add(sel);
  }
  void add(U32 sel=0) {
    cm2.add(sel);
    if (CM1)
      cm1.add(sel);
  }
};

#define CounterMap CounterMap3

//////////////////////////// Model ////////////////////////////

// All models have a function model() which updates the model with the
// last bit of input (in ch) then writes probabilities for the following
// bit into mixer.
class Model {
public:
  virtual void model() = 0;
  virtual ~Model() {}
};

//////////////////////////// allModel ////////////////////////////

// Order 8 context model plus one sparse model (1-3) and default (p = .5)

class AllModel: public Model {
  enum {NC=8};
  U32 cxt[NC];  // Order 0-7 contexts
  CounterMap t;
public:
  AllModel(): t(26) {
    memset(cxt, 0, NC*4);
  }
  void model() {
    if (ch.bpos()==0) {

      // Update character contexts
      t.update(0, 0);
      for (int i=NC-1; i>0; --i) {
        cxt[i]=cxt[i-1]^hash(ch(1), i);
        t.update(cxt[i], i);
      }
      t.update(hash(ch(1), ch(3), NC), NC);
    }
    mixer.write(1, 1);  // Default model
    for (int i=0; i<NC+1; ++i) {
      t.write(i);
    }
  }
} allModel;

//////////////////////////// matchModel ////////////////////////////

/* A MatchModel looks for a match of length n >= 8 bytes between
the current context and previous input, and predicts the next bit
in the previous context with weight n.  If the next bit is 1, then
the mixer is assigned (0, n), else (n, 0).  Matchies are found using
an index (a hash table of pointers into ch). */

class MatchModel: public Model {
  const int N;      // 2^N = hash table size
  enum {M=4};       // Number of strings to match
  U32 hash[2];      // Hashes of current context up to pos-1
  U32 begin[M];     // Points to first matching byte
  U32 end[M];       // Points to last matching byte + 1, 0 if no match
  U32 *ptr;         // Hash table of pointers [2^(MEM+17)]
public:
  MatchModel(): N(22), ptr(new U32[1 << N]) {
    memset(ptr, 0, (1 << N)*sizeof(U32));                             
    hash[0]=hash[1]=0;
    for (int i=0; i<M; ++i)
      begin[i]=end[i]=0;
  }
  void model();
} matchModel;

inline void MatchModel::model() {
  if (ch.bpos()==0) {  // New byte
    hash[0]=hash[0]*(16*56797157)+ch(1)+1;  // Hash last 8 bytes
    hash[1]=hash[1]*(2*45684217)+ch(1)+1;   // Hash last 32 bytes
    U32 h=hash[0] >> (32-N);
    if ((hash[0]>>28)==0)
      h=hash[1] >> (32-N);  // 1/16 of 8-contexts are hashed to 32 bytes   
    for (int i=0; i<M; ++i) {
      if (end[i] && ch(1)==ch[end[i]])
        ++end[i];
    }
    for (int i=0; i<M; ++i) {
      if (!end[i]) { // Search for a matching context
        int j;
        for (j=0; j<M; ++j)  // Search for duplicate match
          if (ptr[h]==end[j])
            break;
        if (j!=M)  // Context already matched?
          break;
        end[i]=ptr[h];
        if (end[i]>0) {
          begin[i]=end[i];
          U32 p=ch.pos();
          while (begin[i]>0 && p>0 && begin[i]!=p+1
              && ch[begin[i]-1]==ch[p-1]) {
            --begin[i];
            --p;
          }
        }
        if (end[i]==begin[i])  // No match found
          begin[i]=end[i]=0;
        break;
      }
    }
    ptr[h]=ch.pos();
  }

  // Test whether the current context is valid in the last 0-7 bits
  for (int i=0; i<M; ++i) {
    if (end[i] && ((ch[end[i]]+256) >> (8-ch.bpos())) != ch())
      begin[i]=end[i]=0;
  }

  // Predict the bit found in the matching contexts
  int n0=0, n1=0;
  for (int i=0; i<M; ++i) {
    if (end[i]) { 
      U32 wt=(end[i]-begin[i]);
      wt=wt*wt/4;
      if (wt>511)
        wt=511;
      int y=(ch[end[i]]>>(7-ch.bpos()))&1;
      if (y)
        n1+=wt;
      else
        n0+=wt;
    }
  }
  mixer.write(n0, n1);
}

//////////////////////////// wordModel ////////////////////////////

// A WordModel models words, which are any characters > 32 separated
// by whitespace ( <= 32), or any sequence of letters (lower case).
// There is a unigram, bigram and sparse
// bigram model (skipping 1 word) for each type of word.

class WordModel: public Model {
  enum {N=3};
  CounterMap t;
  U32 cxt[N];   // Hashes of last N words broken on whitespace
  U32 word[N];  // Hashes of last N words of letters only, lower case
public:
  WordModel(): t(26) {
    for (int i=0; i<N; ++i)
      cxt[i]=word[i]=0;
  }
  void model() {
    if (ch.bpos()==0) {
      int c=ch(1);
      if (c>32) {
        cxt[0]^=hash(cxt[0], c);
      }
      else if (cxt[0]) {
        for (int i=N-1; i>0; --i)
          cxt[i]=cxt[i-1];
        cxt[0]=0;
      }
      if (isalpha(c) || c>=192) 
        word[0]^=hash(word[0], tolower(c), 1);
      else {
        for (int i=N-1; i>0; --i)
          word[i]=word[i-1];
        word[0]=0;
      }
      t.update(hash((1), 1)^cxt[0], 0);
      t.update(hash((1), 2)^cxt[1]+cxt[0], 1);
      t.update(hash((1), 3)^cxt[2]+cxt[0], 2);
      t.update(hash(ch(1), 4)^word[0], 3);
      t.update(hash(ch(1), 5)^word[1]+word[0], 4);
      t.update(hash(ch(1), 6)^word[2]+word[0], 5);
    }
    t.write(0);
    t.write(1);
    t.write(2);
    t.write(3);
    t.write(4);
    t.write(5);
  }
} wordModel;

//////////////////////////// TextModel ////////////////////////////

// Model text (-1 option)

class TextModel: public Model {
public:
  void model() {
    allModel.model();
    wordModel.model();
    matchModel.model();
  }
} textModel;

//////////////////////////// picModel ////////////////////////////

class PicModel: public Model {
  CounterMap t;
public:
  PicModel(): t(22) {}
  void model() {
    if (ch.bpos()==0) {
      t.update(hash(ch(216), ch(432), 0), 0);
      t.update(hash(ch(216), ch(1), 1), 1);
      t.update(hash(ch(216), ch(432), ch(648), 2), 2);
      t.update(hash(ch(216), ch(432), ch(1), 3), 3);
      t.update(hash(ch(216), ch(217), ch(1), 4), 4);
      t.update(hash(ch(216), 5), 5);
      t.update(hash(ch(1), 6), 6);
      t.update(hash(ch(1), ch(2), 7), 7);
      t.update(hash(ch(216), ch(215), ch(1), 8), 8);
    }
    for (int i=0; i<9; ++i) {
      t.write(i);
    }
    mixer.write(1, 1);
  }
} picModel;

//////////////////////////// ObjModel ////////////////////////////

class ObjModel: public Model {
  CounterMap t;
  int k;
public:
  ObjModel(): t(22), k(0) {}
  void model() {
    allModel.model();
    matchModel.model();
    if (ch.bpos()==0) {
      k=0;
      for (int i=1; i<5; ++i) {
        for (int j=i+1; j<9; ++j) {
          t.update(hash(ch(i), ch(j), k), k);
          ++k;
        }
      }
    }
    mixer.write(1, 1);
    for (int i=0; i<k; ++i)
      t.write(i);
  }
} objModel;


//////////////////////////// Predictor ////////////////////////////

/* A Predictor adjusts the model probability using SSE and passes it
to the encoder.  An SSE model is a table of counters, sse[SSE1][SSE2]
which maps a context and a probability into a new, more accurate
probability.  The context, SSE1, consists of the 0-7 bits of the current
byte and the 2 leading bits of the previous byte.  The probability
to be mapped, SSE2 is first stretched near 0 and 1 using SSEMap, then
quantized into SSE2=32 intervals.  Each SSE element is a pair of 0
and 1 counters of the bits seen so far in the current context and
probability range.  Both the bin below and above the current probability
is updated by adding 1 to the appropriate count (n0 or n1).  The
output probability for an SSE element is n1/(n0+n1) interpolated between
the bins below and above the input probability.  This is averaged
with the original probability with 25% weight to give the final
probability passed to the encoder. */

class Predictor {
  enum {SSE1=256*4*2, SSE2=32,  // SSE dimensions (contexts, probability bins)
    SSESCALE=1024/SSE2};      // Number of mapped probabilities between bins

  // Scale probability p into a context in the range 0 to 1K-1 by
  // stretching the ends of the range.
  class SSEMap {
    U16 table[PSCALE];
  public:
    int operator()(int p) const {return table[p];}
    SSEMap();
  } ssemap;  // functoid

  // Secondary source encoder element
  struct SSEContext {
    U8 c1, n;  // Count of 1's, count of bits
    int p() const {return PSCALE*(c1*64+1)/(n*64+2);}
    void update(int y) {
      if (y)
        ++c1;
      if (++n>254) {  // Roll over count overflows
        c1/=2;
        n/=2;
      }
    }
    SSEContext(): c1(0), n(0) {}
  };

  SSEContext (*sse)[SSE2+1];  // [SSE1][SSE2+1] context, mapped probability
  U32 nextp;   // p()
  U32 ssep;    // Output of sse
  U32 context; // SSE context
public:
  Predictor();
  int p() const {return nextp;}  // Returns pr(y = 1) * PSCALE
  void update(int y);  // Update model with bit y = 0 or 1
};

Predictor::SSEMap::SSEMap() {
  for (int i=0; i<PSCALE; ++i) {
    int p=int(64*log((i+0.5)/(PSCALE-0.5-i))+512);
    if (p>1023) p=1023;
    if (p<0) p=0;
    table[i]=p;
  }
}

Predictor::Predictor(): sse(0), nextp(PSCALE/2), ssep(512), context(0) {
  ch.init();

  // Initialize to sse[context][ssemap(p)] = p
  sse=(SSEContext(*)[SSE2+1]) new SSEContext[SSE1][SSE2+1];
  int N=PSCALE;
  int oldp=SSE2+1;
  for (int i=N-1; i>=0; --i) {
    int p=(ssemap(i*PSCALE/N)+SSESCALE/2)/SSESCALE;
    int n=1+N*N/((i+1)*(N-i));
    if (n>254) n=254;
    int c1=(i*n+N/2)/N;
    for (int j=oldp-1; j>=p; --j) {
      for (int k=0; k<SSE1; ++k) {
        sse[k][j].n=n;
        sse[k][j].c1=c1;
      }
    }
    oldp=p;
  }
}

inline void Predictor::update(int y) {

  // Update the bins below and above the last input probability, ssep
  sse[context][ssep/SSESCALE].update(y);
  sse[context][ssep/SSESCALE+1].update(y);

  // Adjust model mixing weights
  mixer.update(y);

  // Update individual models
  ch.update(y);
  if (MEM==1) textModel.model();
  if (MEM==2) picModel.model();
  if (MEM==3) objModel.model();

  // Combine probabilities
  nextp=mixer.predict();

  // Get final probability, interpolate SSE and average with original
  context=(ch(0)*4+ch(1)/64)*2+(ch.pos(0,3)<ch.pos(32,3));  // for SSE
  ssep=ssemap(nextp);
  U32 wt=ssep%SSESCALE;
  U32 i=ssep/SSESCALE;
  nextp=(((sse[context][i].p()*(SSESCALE-wt)+sse[context][i+1].p()*wt)
    /SSESCALE)*3+nextp)/4;
}

//////////////////////////// Encoder ////////////////////////////

/* An Encoder does arithmetic encoding.  Methods:
   Encoder(COMPRESS, f) creates encoder for compression to archive f, which
     must be open past the header for writing in binary mode
   Encoder(DECOMPRESS, f) creates encoder for decompression from archive f,
     which must be open past the header for reading in binary mode
   encode(bit) in COMPRESS mode compresses bit to file f.
   decode() in DECOMPRESS mode returns the next decompressed bit from file f.
   flush() should be called when there is no more to compress
*/

typedef enum {COMPRESS, DECOMPRESS} Mode;
class Encoder {
private:
  Predictor predictor;
  const Mode mode;       // Compress or decompress?
  FILE* archive;         // Compressed data file
  U32 x1, x2;            // Range, initially [0, 1), scaled by 2^32
  U32 x;                 // Last 4 input bytes of archive.
public:
  Encoder(Mode m, FILE* f);
  void encode(int y);    // Compress bit y
  int decode();          // Uncompress and return bit y
  void flush();          // Call when done compressing
};

// Constructor
Encoder::Encoder(Mode m, FILE* f): predictor(), mode(m), archive(f), x1(0),
                                   x2(0xffffffff), x(0) {

  // In DECOMPRESS mode, initialize x to the first 4 bytes of the archive
  if (mode==DECOMPRESS) {
    for (int i=0; i<4; ++i) {
      int c=getc(archive);
      if (c==EOF) c=0;
      x=(x<<8)+(c&0xff);
    }
  }
}

/* encode(y) -- Encode bit y by splitting the range [x1, x2] in proportion
to P(1) and P(0) as given by the predictor and narrowing to the appropriate
subrange.  Output leading bytes of the range as they become known. */

inline void Encoder::encode(int y) {

  // Split the range
  const U32 p=predictor.p()*(4096/PSCALE)+2048/PSCALE; // P(1) * 4K
  assert(p<4096);
  const U32 xdiff=x2-x1;
  U32 xmid=x1;  // = x1+p*(x2-x1) multiply without overflow, round down
  if (xdiff>=0x4000000) xmid+=(xdiff>>12)*p;
  else if (xdiff>=0x100000) xmid+=((xdiff>>6)*p)>>6;
  else xmid+=(xdiff*p)>>12;

  // Update the range
  if (y)
    x2=xmid;
  else
    x1=xmid+1;
  predictor.update(y);

  // Shift equal MSB's out
  while (((x1^x2)&0xff000000)==0) {
    putc(x2>>24, archive);
    x1<<=8;
    x2=(x2<<8)+255;
  }
}

/* Decode one bit from the archive, splitting [x1, x2] as in the encoder
and returning 1 or 0 depending on which subrange the archive point x is in.
*/
inline int Encoder::decode() {

  // Split the range
  const U32 p=predictor.p()*(4096/PSCALE)+2048/PSCALE; // P(1) * 4K
  assert(p<4096);
  const U32 xdiff=x2-x1;
  U32 xmid=x1;  // = x1+p*(x2-x1) multiply without overflow, round down
  if (xdiff>=0x4000000) xmid+=(xdiff>>12)*p;
  else if (xdiff>=0x100000) xmid+=((xdiff>>6)*p)>>6;
  else xmid+=(xdiff*p)>>12;

  // Update the range
  int y=0;
  if (x<=xmid) {
    y=1;
    x2=xmid;
  }
  else
    x1=xmid+1;
  predictor.update(y);

  // Shift equal MSB's out
  while (((x1^x2)&0xff000000)==0) {
    x1<<=8;
    x2=(x2<<8)+255;
    int c=getc(archive);
    if (c==EOF) c=0;
    x=(x<<8)+c;
  }
  return y;
}

// Should be called when there is no more to compress
void Encoder::flush() {

  // In COMPRESS mode, write out the remaining bytes of x, x1 < x < x2
  if (mode==COMPRESS) {
    while (((x1^x2)&0xff000000)==0) {
      putc(x2>>24, archive);
      x1<<=8;
      x2=(x2<<8)+255;
    }
    putc(x2>>24, archive);  // First unequal byte
  }
}

//////////////////////////// Transformer ////////////////////////////

/* A transformer compresses 1 byte at a time.  It also provides a
   place to insert transforms or filters in the future.

  Transformer tf(COMPRESS, f) -- Initialize for compression to archive f
    which must be open in "wb" mode with the header already written
  Transformer tf(DECOMPRESS, f) -- Initialize for decompression from f which
    must be open in "rb" mode past the header
  tf.encode(c) -- Compress byte c
  c = tf.decode() -- Decompress byte c
  tf.flush() -- Should be called when compression is finished
*/

class Transformer {
  Encoder e;
public:
  Transformer(Mode mode, FILE* f): e(mode, f) {}
  void encode(int c) {
    for (int i=7; i>=0; --i)
      e.encode((c>>i)&1);
  }
  U32 decode() {
    U32 c=0;
    for (int i=0; i<8; ++i)
      c=c+c+e.decode();
    return c;
  }
  void flush() {
    e.flush();
  }
};

//////////////////////////// main ////////////////////////////

// Read and return a line of input from FILE f (default stdin) up to
// first control character except tab.  Skips CR in CR LF.
string getline(FILE* f=stdin) {
  int c;
  string result="";
  while ((c=getc(f))!=EOF && (c>=32 || c=='\t'))
    result+=char(c);
  if (c=='\r')
    (void) getc(f);
  return result;
}

// User interface
int main(int argc, char** argv) {

  // Check arguments
  if (argc<2) {
    printf(
      PROGNAME " file compressor/archiver, (C) 2004, Matt Mahoney, mmahoney@cs.fit.edu\n"
      "This program is free software distributed without warranty under the terms\n"
      "of the GNU General Public License, see http://www.gnu.org/licenses/gpl.txt\n"
      "\n"
      "To compress:         " PROGNAME " -1 archive filenames...  (archive will be created)\n"
      "  or (MSDOS):        dir/b | " PROGNAME " -1 archive  (reads file names from input)\n"
      "To extract/compare:  " PROGNAME " archive  (does not clobber existing files)\n"
      "To view contents:    more < archive\n"
      "\n"
      "Compression option: -1 for text (default), -2 for CCITT images (pic)\n"
      "or -3 for other binary files (geo, obj1, obj2)\n");
    return 1;
  }

  // Read and remove -MEM option
  if (argc>1 && argv[1][0]=='-') {
    if (isdigit(argv[1][1]) && argv[1][2]==0) {
      MEM=argv[1][1]-'0';
    }
    else
      printf("Option %s ignored\n", argv[1]);
    argc--;
    argv++;
  }
  if (MEM<1 || MEM>3) MEM=1;

  // File names and sizes from input or archive
  vector<string> filename; // List of names
  vector<long> filesize;   // Size or -1 if error
  int uncompressed_bytes=0, compressed_bytes=0;  // Input, output sizes

  // Extract files
  FILE* archive=fopen(argv[1], "rb");
  if (archive) {
    if (argc>2) {
      printf("File %s already exists\n", argv[1]);
      return 1;
    }

    // Read PROGNAME " -m\r\n" at start of archive
    string s=getline(archive);
    if (s.substr(0, string(PROGNAME).size()) != PROGNAME) {
      printf("Archive file %s not in " PROGNAME " format\n", argv[1]);
      return 1;
    }

    // Get option -m where m is a digit
    if (s.size()>2 && s[s.size()-2]=='-') {
      int c=s[s.size()-1];
      if (c>='0' && c<='9')
        MEM=c-'0';
    }
    printf("Extracting archive " PROGNAME " -%d %s ...\n", MEM, argv[1]);

    // Read "size filename" in "%d\t%s\r\n" format
    while (true) {
      string s=getline(archive);
      if (s.size()>1) {
        filesize.push_back(atol(s.c_str()));
        string::iterator tab=find(s.begin(), s.end(), '\t');
        if (tab!=s.end())
          filename.push_back(string(tab+1, s.end()));
        else
          filename.push_back("");
      }
      else
        break;
    }

    // Test end of header for "\f\0"
    {
      int c1=0, c2=0;
      if ((c1=getc(archive))!='\f' || (c2=getc(archive))!=0) {
        printf("%s: Bad " PROGNAME " header format %d %d\n", argv[1],
          c1, c2);
        return 1;
      }
    }

    // Extract files from archive data
    Transformer e(DECOMPRESS, archive);
    for (int i=0; i<int(filename.size()); ++i) {
      printf("%10ld %s: ", filesize[i], filename[i].c_str());

      // Compare with existing file
      FILE* f=fopen(filename[i].c_str(), "rb");
      const long size=filesize[i];
      uncompressed_bytes+=size;
      if (f) {
        bool different=false;
        for (long j=0; j<size; ++j) {
          int c1=e.decode();
          int c2=getc(f);
          if (!different && c1!=c2) {
            printf("differ at offset %ld, archive=%d file=%d\n",
              j, c1, c2);
            different=true;
          }
        }
        if (!different)
          printf("identical\n");
        fclose(f);
      }

      // Extract to new file
      else {
        f=fopen(filename[i].c_str(), "wb");
        if (!f)
          printf("cannot create, skipping...\n");
        for (long j=0; j<size; ++j) {
          int c=e.decode();
          if (f)
            putc(c, f);
        }
        if (f) {
          printf("extracted\n");
          fclose(f);
        }
      }
    }
    compressed_bytes=ftell(archive);
    fclose(archive);
  }

  // Compress files
  else {

    // Read file names from command line or input
    if (argc>2)
      for (int i=2; i<argc; ++i)
        filename.push_back(argv[i]);
    else {
      printf(
        "Enter names of files to compress, followed by blank line or EOF.\n");
      while (true) {
        string s=getline(stdin);
        if (s=="")
          break;
        else
          filename.push_back(s);
      }
    }

    // Get file sizes
    for (int i=0; i<int(filename.size()); ++i) {
      FILE* f=fopen(filename[i].c_str(), "rb");
      if (!f) {
        printf("File not found, skipping: %s\n",
          filename[i].c_str());
        filesize.push_back(-1);
      }
      else {
        fseek(f, 0L, SEEK_END);
        filesize.push_back(ftell(f));
        fclose(f);
      }
    }
    if (filesize.empty() || *max_element(filesize.begin(), filesize.end())<0){
      printf("No files to compress, no archive created.\n");
      return 1;
    }

    // Write header
    archive=fopen(argv[1], "wb");
    if (!archive) {
      printf("Cannot create archive: %s\n", argv[1]);
      return 1;
    }
    fprintf(archive, PROGNAME " -%d\r\n", MEM);
    for (int i=0; i<int(filename.size()); ++i) {
      if (filesize[i]>=0)
        fprintf(archive, "%ld\t%s\r\n", filesize[i], filename[i].c_str());
    }
    putc(032, archive);  // MSDOS EOF
    putc('\f', archive);
    putc(0, archive);

    // Write data
    Transformer e(COMPRESS, archive);
    long file_start=ftell(archive);
    for (int i=0; i<int(filename.size()); ++i) {
      const long size=filesize[i];
      if (size>=0) {
        uncompressed_bytes+=size;
        printf("%-23s %10ld -> ", filename[i].c_str(), size);
        FILE* f=fopen(filename[i].c_str(), "rb");
        int c;
        for (long j=0; j<size; ++j) {
          if (f)
            c=getc(f);
          else
            c=0;
          e.encode(c);
        }
        if (f)
          fclose(f);
        printf("%ld\n", ftell(archive)-file_start);
        file_start=ftell(archive);
      }
    }
    e.flush();
    compressed_bytes=ftell(archive);
    fclose(archive);
  }

  // Report statistics
  const double elapsed_time =
    double(clock()-programChecker.start_time())/CLOCKS_PER_SEC;
  printf("%d/%d in %1.2f sec.", compressed_bytes, uncompressed_bytes,
    elapsed_time);
  if (uncompressed_bytes>0 && elapsed_time>0) {
    printf(" (%1.4f bpc, %1.2f%% at %1.0f KB/s)",
      compressed_bytes*8.0/uncompressed_bytes,
      compressed_bytes*100.0/uncompressed_bytes,
      uncompressed_bytes/(elapsed_time*1000.0));
  }
  printf("\n");
  return 0;
}