| 1 | // BZip2OutputStream.cs
|
|---|
| 2 | //
|
|---|
| 3 | // Copyright (C) 2001 Mike Krueger
|
|---|
| 4 | //
|
|---|
| 5 | // This program is free software; you can redistribute it and/or
|
|---|
| 6 | // modify it under the terms of the GNU General Public License
|
|---|
| 7 | // as published by the Free Software Foundation; either version 2
|
|---|
| 8 | // of the License, or (at your option) any later version.
|
|---|
| 9 | //
|
|---|
| 10 | // This program is distributed in the hope that it will be useful,
|
|---|
| 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 13 | // GNU General Public License for more details.
|
|---|
| 14 | //
|
|---|
| 15 | // You should have received a copy of the GNU General Public License
|
|---|
| 16 | // along with this program; if not, write to the Free Software
|
|---|
| 17 | // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|---|
| 18 | //
|
|---|
| 19 | // Linking this library statically or dynamically with other modules is
|
|---|
| 20 | // making a combined work based on this library. Thus, the terms and
|
|---|
| 21 | // conditions of the GNU General Public License cover the whole
|
|---|
| 22 | // combination.
|
|---|
| 23 | //
|
|---|
| 24 | // As a special exception, the copyright holders of this library give you
|
|---|
| 25 | // permission to link this library with independent modules to produce an
|
|---|
| 26 | // executable, regardless of the license terms of these independent
|
|---|
| 27 | // modules, and to copy and distribute the resulting executable under
|
|---|
| 28 | // terms of your choice, provided that you also meet, for each linked
|
|---|
| 29 | // independent module, the terms and conditions of the license of that
|
|---|
| 30 | // module. An independent module is a module which is not derived from
|
|---|
| 31 | // or based on this library. If you modify this library, you may extend
|
|---|
| 32 | // this exception to your version of the library, but you are not
|
|---|
| 33 | // obligated to do so. If you do not wish to do so, delete this
|
|---|
| 34 | // exception statement from your version.
|
|---|
| 35 |
|
|---|
| 36 | using System;
|
|---|
| 37 | using System.IO;
|
|---|
| 38 |
|
|---|
| 39 | using ICSharpCode.SharpZipLib.Checksums;
|
|---|
| 40 |
|
|---|
| 41 | namespace ICSharpCode.SharpZipLib.BZip2
|
|---|
| 42 | {
|
|---|
| 43 |
|
|---|
| 44 | // TODO: Update to BZip2 1.0.1, 1.0.2
|
|---|
| 45 |
|
|---|
| 46 | /// <summary>
|
|---|
| 47 | /// An output stream that compresses into the BZip2 format
|
|---|
| 48 | /// including file header chars into another stream.
|
|---|
| 49 | /// </summary>
|
|---|
| 50 | public class BZip2OutputStream : Stream
|
|---|
| 51 | {
|
|---|
| 52 | #region Constants
|
|---|
| 53 | const int SETMASK = (1 << 21);
|
|---|
| 54 | const int CLEARMASK = (~SETMASK);
|
|---|
| 55 | const int GREATER_ICOST = 15;
|
|---|
| 56 | const int LESSER_ICOST = 0;
|
|---|
| 57 | const int SMALL_THRESH = 20;
|
|---|
| 58 | const int DEPTH_THRESH = 10;
|
|---|
| 59 |
|
|---|
| 60 | /*--
|
|---|
| 61 | If you are ever unlucky/improbable enough
|
|---|
| 62 | to get a stack overflow whilst sorting,
|
|---|
| 63 | increase the following constant and try
|
|---|
| 64 | again. In practice I have never seen the
|
|---|
| 65 | stack go above 27 elems, so the following
|
|---|
| 66 | limit seems very generous.
|
|---|
| 67 | --*/
|
|---|
| 68 | const int QSORT_STACK_SIZE = 1000;
|
|---|
| 69 |
|
|---|
| 70 | /*--
|
|---|
| 71 | Knuth's increments seem to work better
|
|---|
| 72 | than Incerpi-Sedgewick here. Possibly
|
|---|
| 73 | because the number of elems to sort is
|
|---|
| 74 | usually small, typically <= 20.
|
|---|
| 75 | --*/
|
|---|
| 76 | readonly int[] increments = new int[] {
|
|---|
| 77 | 1, 4, 13, 40, 121, 364, 1093, 3280,
|
|---|
| 78 | 9841, 29524, 88573, 265720,
|
|---|
| 79 | 797161, 2391484
|
|---|
| 80 | };
|
|---|
| 81 | #endregion
|
|---|
| 82 |
|
|---|
| 83 | #region Constructors
|
|---|
| 84 | /// <summary>
|
|---|
| 85 | /// Construct a default output stream with maximum block size
|
|---|
| 86 | /// </summary>
|
|---|
| 87 | /// <param name="stream">The stream to write BZip data onto.</param>
|
|---|
| 88 | public BZip2OutputStream(Stream stream) : this(stream, 9)
|
|---|
| 89 | {
|
|---|
| 90 | }
|
|---|
| 91 |
|
|---|
| 92 | /// <summary>
|
|---|
| 93 | /// Initialise a new instance of the <see cref="BZip2OutputStream"></see>
|
|---|
| 94 | /// for the specified stream, using the given blocksize.
|
|---|
| 95 | /// </summary>
|
|---|
| 96 | /// <param name="stream">The stream to write compressed data to.</param>
|
|---|
| 97 | /// <param name="blockSize">The block size to use.</param>
|
|---|
| 98 | /// <remarks>
|
|---|
| 99 | /// Valid block sizes are in the range 1..9, with 1 giving
|
|---|
| 100 | /// the lowest compression and 9 the highest.
|
|---|
| 101 | /// </remarks>
|
|---|
| 102 | public BZip2OutputStream(Stream stream, int blockSize)
|
|---|
| 103 | {
|
|---|
| 104 | BsSetStream(stream);
|
|---|
| 105 |
|
|---|
| 106 | workFactor = 50;
|
|---|
| 107 | if (blockSize > 9) {
|
|---|
| 108 | blockSize = 9;
|
|---|
| 109 | }
|
|---|
| 110 |
|
|---|
| 111 | if (blockSize < 1) {
|
|---|
| 112 | blockSize = 1;
|
|---|
| 113 | }
|
|---|
| 114 | blockSize100k = blockSize;
|
|---|
| 115 | AllocateCompressStructures();
|
|---|
| 116 | Initialize();
|
|---|
| 117 | InitBlock();
|
|---|
| 118 | }
|
|---|
| 119 | #endregion
|
|---|
| 120 |
|
|---|
| 121 | #region Destructor
|
|---|
| 122 | /// <summary>
|
|---|
| 123 | /// Ensures that resources are freed and other cleanup operations
|
|---|
| 124 | /// are performed when the garbage collector reclaims the BZip2OutputStream.
|
|---|
| 125 | /// </summary>
|
|---|
| 126 | ~BZip2OutputStream()
|
|---|
| 127 | {
|
|---|
| 128 | Dispose(false);
|
|---|
| 129 | }
|
|---|
| 130 | #endregion
|
|---|
| 131 |
|
|---|
| 132 | /// <summary>
|
|---|
| 133 | /// Get/set flag indicating ownership of underlying stream.
|
|---|
| 134 | /// When the flag is true <see cref="Close"></see> will close the underlying stream also.
|
|---|
| 135 | /// </summary>
|
|---|
| 136 | public bool IsStreamOwner
|
|---|
| 137 | {
|
|---|
| 138 | get { return isStreamOwner; }
|
|---|
| 139 | set { isStreamOwner = value; }
|
|---|
| 140 | }
|
|---|
| 141 |
|
|---|
| 142 |
|
|---|
| 143 | #region Stream overrides
|
|---|
| 144 | /// <summary>
|
|---|
| 145 | /// Gets a value indicating whether the current stream supports reading
|
|---|
| 146 | /// </summary>
|
|---|
| 147 | public override bool CanRead
|
|---|
| 148 | {
|
|---|
| 149 | get {
|
|---|
| 150 | return false;
|
|---|
| 151 | }
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | /// <summary>
|
|---|
| 155 | /// Gets a value indicating whether the current stream supports seeking
|
|---|
| 156 | /// </summary>
|
|---|
| 157 | public override bool CanSeek {
|
|---|
| 158 | get {
|
|---|
| 159 | return false;
|
|---|
| 160 | }
|
|---|
| 161 | }
|
|---|
| 162 |
|
|---|
| 163 | /// <summary>
|
|---|
| 164 | /// Gets a value indicating whether the current stream supports writing
|
|---|
| 165 | /// </summary>
|
|---|
| 166 | public override bool CanWrite {
|
|---|
| 167 | get {
|
|---|
| 168 | return baseStream.CanWrite;
|
|---|
| 169 | }
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 | /// <summary>
|
|---|
| 173 | /// Gets the length in bytes of the stream
|
|---|
| 174 | /// </summary>
|
|---|
| 175 | public override long Length {
|
|---|
| 176 | get {
|
|---|
| 177 | return baseStream.Length;
|
|---|
| 178 | }
|
|---|
| 179 | }
|
|---|
| 180 |
|
|---|
| 181 | /// <summary>
|
|---|
| 182 | /// Gets or sets the current position of this stream.
|
|---|
| 183 | /// </summary>
|
|---|
| 184 | public override long Position {
|
|---|
| 185 | get {
|
|---|
| 186 | return baseStream.Position;
|
|---|
| 187 | }
|
|---|
| 188 | set {
|
|---|
| 189 | throw new NotSupportedException("BZip2OutputStream position cannot be set");
|
|---|
| 190 | }
|
|---|
| 191 | }
|
|---|
| 192 |
|
|---|
| 193 | /// <summary>
|
|---|
| 194 | /// Sets the current position of this stream to the given value.
|
|---|
| 195 | /// </summary>
|
|---|
| 196 | /// <param name="offset">The point relative to the offset from which to being seeking.</param>
|
|---|
| 197 | /// <param name="origin">The reference point from which to begin seeking.</param>
|
|---|
| 198 | /// <returns>The new position in the stream.</returns>
|
|---|
| 199 | public override long Seek(long offset, SeekOrigin origin)
|
|---|
| 200 | {
|
|---|
| 201 | throw new NotSupportedException("BZip2OutputStream Seek not supported");
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | /// <summary>
|
|---|
| 205 | /// Sets the length of this stream to the given value.
|
|---|
| 206 | /// </summary>
|
|---|
| 207 | /// <param name="value">The new stream length.</param>
|
|---|
| 208 | public override void SetLength(long value)
|
|---|
| 209 | {
|
|---|
| 210 | throw new NotSupportedException("BZip2OutputStream SetLength not supported");
|
|---|
| 211 | }
|
|---|
| 212 |
|
|---|
| 213 | /// <summary>
|
|---|
| 214 | /// Read a byte from the stream advancing the position.
|
|---|
| 215 | /// </summary>
|
|---|
| 216 | /// <returns>The byte read cast to an int; -1 if end of stream.</returns>
|
|---|
| 217 | public override int ReadByte()
|
|---|
| 218 | {
|
|---|
| 219 | throw new NotSupportedException("BZip2OutputStream ReadByte not supported");
|
|---|
| 220 | }
|
|---|
| 221 |
|
|---|
| 222 | /// <summary>
|
|---|
| 223 | /// Read a block of bytes
|
|---|
| 224 | /// </summary>
|
|---|
| 225 | /// <param name="buffer">The buffer to read into.</param>
|
|---|
| 226 | /// <param name="offset">The offset in the buffer to start storing data at.</param>
|
|---|
| 227 | /// <param name="count">The maximum number of bytes to read.</param>
|
|---|
| 228 | /// <returns>The total number of bytes read. This might be less than the number of bytes
|
|---|
| 229 | /// requested if that number of bytes are not currently available, or zero
|
|---|
| 230 | /// if the end of the stream is reached.</returns>
|
|---|
| 231 | public override int Read(byte[] buffer, int offset, int count)
|
|---|
| 232 | {
|
|---|
| 233 | throw new NotSupportedException("BZip2OutputStream Read not supported");
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | /// <summary>
|
|---|
| 237 | /// Write a block of bytes to the stream
|
|---|
| 238 | /// </summary>
|
|---|
| 239 | /// <param name="buffer">The buffer containing data to write.</param>
|
|---|
| 240 | /// <param name="offset">The offset of the first byte to write.</param>
|
|---|
| 241 | /// <param name="count">The number of bytes to write.</param>
|
|---|
| 242 | public override void Write(byte[] buffer, int offset, int count)
|
|---|
| 243 | {
|
|---|
| 244 | if ( buffer == null ) {
|
|---|
| 245 | throw new ArgumentNullException("buffer");
|
|---|
| 246 | }
|
|---|
| 247 |
|
|---|
| 248 | if ( offset < 0 )
|
|---|
| 249 | {
|
|---|
| 250 | throw new ArgumentOutOfRangeException("offset");
|
|---|
| 251 | }
|
|---|
| 252 |
|
|---|
| 253 | if ( count < 0 )
|
|---|
| 254 | {
|
|---|
| 255 | throw new ArgumentOutOfRangeException("count");
|
|---|
| 256 | }
|
|---|
| 257 |
|
|---|
| 258 | if ( buffer.Length - offset < count )
|
|---|
| 259 | {
|
|---|
| 260 | throw new ArgumentException("Offset/count out of range");
|
|---|
| 261 | }
|
|---|
| 262 |
|
|---|
| 263 | for (int i = 0; i < count; ++i) {
|
|---|
| 264 | WriteByte(buffer[offset + i]);
|
|---|
| 265 | }
|
|---|
| 266 | }
|
|---|
| 267 |
|
|---|
| 268 | /// <summary>
|
|---|
| 269 | /// Write a byte to the stream.
|
|---|
| 270 | /// </summary>
|
|---|
| 271 | /// <param name="value">The byte to write to the stream.</param>
|
|---|
| 272 | public override void WriteByte(byte value)
|
|---|
| 273 | {
|
|---|
| 274 | int b = (256 + value) % 256;
|
|---|
| 275 | if (currentChar != -1) {
|
|---|
| 276 | if (currentChar == b) {
|
|---|
| 277 | runLength++;
|
|---|
| 278 | if (runLength > 254) {
|
|---|
| 279 | WriteRun();
|
|---|
| 280 | currentChar = -1;
|
|---|
| 281 | runLength = 0;
|
|---|
| 282 | }
|
|---|
| 283 | } else {
|
|---|
| 284 | WriteRun();
|
|---|
| 285 | runLength = 1;
|
|---|
| 286 | currentChar = b;
|
|---|
| 287 | }
|
|---|
| 288 | } else {
|
|---|
| 289 | currentChar = b;
|
|---|
| 290 | runLength++;
|
|---|
| 291 | }
|
|---|
| 292 | }
|
|---|
| 293 |
|
|---|
| 294 | /// <summary>
|
|---|
| 295 | /// End the current block and end compression.
|
|---|
| 296 | /// Close the stream and free any resources
|
|---|
| 297 | /// </summary>
|
|---|
| 298 | public override void Close()
|
|---|
| 299 | {
|
|---|
| 300 | Dispose(true);
|
|---|
| 301 | GC.SuppressFinalize(this);
|
|---|
| 302 | }
|
|---|
| 303 |
|
|---|
| 304 | #endregion
|
|---|
| 305 | void MakeMaps()
|
|---|
| 306 | {
|
|---|
| 307 | nInUse = 0;
|
|---|
| 308 | for (int i = 0; i < 256; i++) {
|
|---|
| 309 | if (inUse[i]) {
|
|---|
| 310 | seqToUnseq[nInUse] = (char)i;
|
|---|
| 311 | unseqToSeq[i] = (char)nInUse;
|
|---|
| 312 | nInUse++;
|
|---|
| 313 | }
|
|---|
| 314 | }
|
|---|
| 315 | }
|
|---|
| 316 |
|
|---|
| 317 | /// <summary>
|
|---|
| 318 | /// Get the number of bytes written to output.
|
|---|
| 319 | /// </summary>
|
|---|
| 320 | void WriteRun()
|
|---|
| 321 | {
|
|---|
| 322 | if (last < allowableBlockSize) {
|
|---|
| 323 | inUse[currentChar] = true;
|
|---|
| 324 | for (int i = 0; i < runLength; i++) {
|
|---|
| 325 | mCrc.Update(currentChar);
|
|---|
| 326 | }
|
|---|
| 327 |
|
|---|
| 328 | switch (runLength) {
|
|---|
| 329 | case 1:
|
|---|
| 330 | last++;
|
|---|
| 331 | block[last + 1] = (byte)currentChar;
|
|---|
| 332 | break;
|
|---|
| 333 | case 2:
|
|---|
| 334 | last++;
|
|---|
| 335 | block[last + 1] = (byte)currentChar;
|
|---|
| 336 | last++;
|
|---|
| 337 | block[last + 1] = (byte)currentChar;
|
|---|
| 338 | break;
|
|---|
| 339 | case 3:
|
|---|
| 340 | last++;
|
|---|
| 341 | block[last + 1] = (byte)currentChar;
|
|---|
| 342 | last++;
|
|---|
| 343 | block[last + 1] = (byte)currentChar;
|
|---|
| 344 | last++;
|
|---|
| 345 | block[last + 1] = (byte)currentChar;
|
|---|
| 346 | break;
|
|---|
| 347 | default:
|
|---|
| 348 | inUse[runLength - 4] = true;
|
|---|
| 349 | last++;
|
|---|
| 350 | block[last + 1] = (byte)currentChar;
|
|---|
| 351 | last++;
|
|---|
| 352 | block[last + 1] = (byte)currentChar;
|
|---|
| 353 | last++;
|
|---|
| 354 | block[last + 1] = (byte)currentChar;
|
|---|
| 355 | last++;
|
|---|
| 356 | block[last + 1] = (byte)currentChar;
|
|---|
| 357 | last++;
|
|---|
| 358 | block[last + 1] = (byte)(runLength - 4);
|
|---|
| 359 | break;
|
|---|
| 360 | }
|
|---|
| 361 | } else {
|
|---|
| 362 | EndBlock();
|
|---|
| 363 | InitBlock();
|
|---|
| 364 | WriteRun();
|
|---|
| 365 | }
|
|---|
| 366 | }
|
|---|
| 367 |
|
|---|
| 368 | /// <summary>
|
|---|
| 369 | /// Get the number of bytes written to the output.
|
|---|
| 370 | /// </summary>
|
|---|
| 371 | public int BytesWritten
|
|---|
| 372 | {
|
|---|
| 373 | get { return bytesOut; }
|
|---|
| 374 | }
|
|---|
| 375 |
|
|---|
| 376 | /// <summary>
|
|---|
| 377 | /// Releases the unmanaged resources used by the <see cref="BZip2OutputStream"/> and optionally releases the managed resources.
|
|---|
| 378 | /// </summary>
|
|---|
| 379 | /// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
|
|---|
| 380 | #if NET_1_0 || NET_1_1 || NETCF_1_0
|
|---|
| 381 | protected virtual void Dispose(bool disposing)
|
|---|
| 382 | #else
|
|---|
| 383 | override protected void Dispose(bool disposing)
|
|---|
| 384 | #endif
|
|---|
| 385 | {
|
|---|
| 386 | try {
|
|---|
| 387 | #if !NET_1_0 && !NET_1_1 && !NETCF_1_0
|
|---|
| 388 | base.Dispose(disposing);
|
|---|
| 389 | #endif
|
|---|
| 390 | if( !disposed_ ) {
|
|---|
| 391 | disposed_=true;
|
|---|
| 392 |
|
|---|
| 393 | if( runLength>0 ) {
|
|---|
| 394 | WriteRun();
|
|---|
| 395 | }
|
|---|
| 396 |
|
|---|
| 397 | currentChar=-1;
|
|---|
| 398 | EndBlock();
|
|---|
| 399 | EndCompression();
|
|---|
| 400 | Flush();
|
|---|
| 401 | }
|
|---|
| 402 | }
|
|---|
| 403 | finally {
|
|---|
| 404 | if ( disposing ) {
|
|---|
| 405 | if ( IsStreamOwner ) {
|
|---|
| 406 | baseStream.Close();
|
|---|
| 407 | }
|
|---|
| 408 | }
|
|---|
| 409 | }
|
|---|
| 410 | }
|
|---|
| 411 |
|
|---|
| 412 | /// <summary>
|
|---|
| 413 | /// Flush output buffers
|
|---|
| 414 | /// </summary>
|
|---|
| 415 | public override void Flush()
|
|---|
| 416 | {
|
|---|
| 417 | baseStream.Flush();
|
|---|
| 418 | }
|
|---|
| 419 |
|
|---|
| 420 | void Initialize()
|
|---|
| 421 | {
|
|---|
| 422 | bytesOut = 0;
|
|---|
| 423 | nBlocksRandomised = 0;
|
|---|
| 424 |
|
|---|
| 425 | /*--- Write header `magic' bytes indicating file-format == huffmanised,
|
|---|
| 426 | followed by a digit indicating blockSize100k.
|
|---|
| 427 | ---*/
|
|---|
| 428 |
|
|---|
| 429 | BsPutUChar('B');
|
|---|
| 430 | BsPutUChar('Z');
|
|---|
| 431 |
|
|---|
| 432 | BsPutUChar('h');
|
|---|
| 433 | BsPutUChar('0' + blockSize100k);
|
|---|
| 434 |
|
|---|
| 435 | combinedCRC = 0;
|
|---|
| 436 | }
|
|---|
| 437 |
|
|---|
| 438 | void InitBlock()
|
|---|
| 439 | {
|
|---|
| 440 | mCrc.Reset();
|
|---|
| 441 | last = -1;
|
|---|
| 442 |
|
|---|
| 443 | for (int i = 0; i < 256; i++) {
|
|---|
| 444 | inUse[i] = false;
|
|---|
| 445 | }
|
|---|
| 446 |
|
|---|
| 447 | /*--- 20 is just a paranoia constant ---*/
|
|---|
| 448 | allowableBlockSize = BZip2Constants.BaseBlockSize * blockSize100k - 20;
|
|---|
| 449 | }
|
|---|
| 450 |
|
|---|
| 451 | void EndBlock()
|
|---|
| 452 | {
|
|---|
| 453 | if (last < 0) { // dont do anything for empty files, (makes empty files compatible with original Bzip)
|
|---|
| 454 | return;
|
|---|
| 455 | }
|
|---|
| 456 |
|
|---|
| 457 | blockCRC = unchecked((uint)mCrc.Value);
|
|---|
| 458 | combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
|
|---|
| 459 | combinedCRC ^= blockCRC;
|
|---|
| 460 |
|
|---|
| 461 | /*-- sort the block and establish position of original string --*/
|
|---|
| 462 | DoReversibleTransformation();
|
|---|
| 463 |
|
|---|
| 464 | /*--
|
|---|
| 465 | A 6-byte block header, the value chosen arbitrarily
|
|---|
| 466 | as 0x314159265359 :-). A 32 bit value does not really
|
|---|
| 467 | give a strong enough guarantee that the value will not
|
|---|
| 468 | appear by chance in the compressed datastream. Worst-case
|
|---|
| 469 | probability of this event, for a 900k block, is about
|
|---|
| 470 | 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
|
|---|
| 471 | For a compressed file of size 100Gb -- about 100000 blocks --
|
|---|
| 472 | only a 48-bit marker will do. NB: normal compression/
|
|---|
| 473 | decompression do *not* rely on these statistical properties.
|
|---|
| 474 | They are only important when trying to recover blocks from
|
|---|
| 475 | damaged files.
|
|---|
| 476 | --*/
|
|---|
| 477 | BsPutUChar(0x31);
|
|---|
| 478 | BsPutUChar(0x41);
|
|---|
| 479 | BsPutUChar(0x59);
|
|---|
| 480 | BsPutUChar(0x26);
|
|---|
| 481 | BsPutUChar(0x53);
|
|---|
| 482 | BsPutUChar(0x59);
|
|---|
| 483 |
|
|---|
| 484 | /*-- Now the block's CRC, so it is in a known place. --*/
|
|---|
| 485 | unchecked {
|
|---|
| 486 | BsPutint((int)blockCRC);
|
|---|
| 487 | }
|
|---|
| 488 |
|
|---|
| 489 | /*-- Now a single bit indicating randomisation. --*/
|
|---|
| 490 | if (blockRandomised) {
|
|---|
| 491 | BsW(1,1);
|
|---|
| 492 | nBlocksRandomised++;
|
|---|
| 493 | } else {
|
|---|
| 494 | BsW(1,0);
|
|---|
| 495 | }
|
|---|
| 496 |
|
|---|
| 497 | /*-- Finally, block's contents proper. --*/
|
|---|
| 498 | MoveToFrontCodeAndSend();
|
|---|
| 499 | }
|
|---|
| 500 |
|
|---|
| 501 | void EndCompression()
|
|---|
| 502 | {
|
|---|
| 503 | /*--
|
|---|
| 504 | Now another magic 48-bit number, 0x177245385090, to
|
|---|
| 505 | indicate the end of the last block. (sqrt(pi), if
|
|---|
| 506 | you want to know. I did want to use e, but it contains
|
|---|
| 507 | too much repetition -- 27 18 28 18 28 46 -- for me
|
|---|
| 508 | to feel statistically comfortable. Call me paranoid.)
|
|---|
| 509 | --*/
|
|---|
| 510 | BsPutUChar(0x17);
|
|---|
| 511 | BsPutUChar(0x72);
|
|---|
| 512 | BsPutUChar(0x45);
|
|---|
| 513 | BsPutUChar(0x38);
|
|---|
| 514 | BsPutUChar(0x50);
|
|---|
| 515 | BsPutUChar(0x90);
|
|---|
| 516 |
|
|---|
| 517 | unchecked {
|
|---|
| 518 | BsPutint((int)combinedCRC);
|
|---|
| 519 | }
|
|---|
| 520 |
|
|---|
| 521 | BsFinishedWithStream();
|
|---|
| 522 | }
|
|---|
| 523 |
|
|---|
| 524 | void BsSetStream(Stream stream)
|
|---|
| 525 | {
|
|---|
| 526 | baseStream = stream;
|
|---|
| 527 | bsLive = 0;
|
|---|
| 528 | bsBuff = 0;
|
|---|
| 529 | bytesOut = 0;
|
|---|
| 530 | }
|
|---|
| 531 |
|
|---|
| 532 | void BsFinishedWithStream()
|
|---|
| 533 | {
|
|---|
| 534 | while (bsLive > 0)
|
|---|
| 535 | {
|
|---|
| 536 | int ch = (bsBuff >> 24);
|
|---|
| 537 | baseStream.WriteByte((byte)ch); // write 8-bit
|
|---|
| 538 | bsBuff <<= 8;
|
|---|
| 539 | bsLive -= 8;
|
|---|
| 540 | bytesOut++;
|
|---|
| 541 | }
|
|---|
| 542 | }
|
|---|
| 543 |
|
|---|
| 544 | void BsW(int n, int v)
|
|---|
| 545 | {
|
|---|
| 546 | while (bsLive >= 8) {
|
|---|
| 547 | int ch = (bsBuff >> 24);
|
|---|
| 548 | unchecked{baseStream.WriteByte((byte)ch);} // write 8-bit
|
|---|
| 549 | bsBuff <<= 8;
|
|---|
| 550 | bsLive -= 8;
|
|---|
| 551 | ++bytesOut;
|
|---|
| 552 | }
|
|---|
| 553 | bsBuff |= (v << (32 - bsLive - n));
|
|---|
| 554 | bsLive += n;
|
|---|
| 555 | }
|
|---|
| 556 |
|
|---|
| 557 | void BsPutUChar(int c)
|
|---|
| 558 | {
|
|---|
| 559 | BsW(8, c);
|
|---|
| 560 | }
|
|---|
| 561 |
|
|---|
| 562 | void BsPutint(int u)
|
|---|
| 563 | {
|
|---|
| 564 | BsW(8, (u >> 24) & 0xFF);
|
|---|
| 565 | BsW(8, (u >> 16) & 0xFF);
|
|---|
| 566 | BsW(8, (u >> 8) & 0xFF);
|
|---|
| 567 | BsW(8, u & 0xFF);
|
|---|
| 568 | }
|
|---|
| 569 |
|
|---|
| 570 | void BsPutIntVS(int numBits, int c)
|
|---|
| 571 | {
|
|---|
| 572 | BsW(numBits, c);
|
|---|
| 573 | }
|
|---|
| 574 |
|
|---|
| 575 | void SendMTFValues()
|
|---|
| 576 | {
|
|---|
| 577 | char[][] len = new char[BZip2Constants.GroupCount][];
|
|---|
| 578 | for (int i = 0; i < BZip2Constants.GroupCount; ++i) {
|
|---|
| 579 | len[i] = new char[BZip2Constants.MaximumAlphaSize];
|
|---|
| 580 | }
|
|---|
| 581 |
|
|---|
| 582 | int gs, ge, totc, bt, bc, iter;
|
|---|
| 583 | int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
|
|---|
| 584 | int nGroups;
|
|---|
| 585 |
|
|---|
| 586 | alphaSize = nInUse + 2;
|
|---|
| 587 | for (int t = 0; t < BZip2Constants.GroupCount; t++) {
|
|---|
| 588 | for (int v = 0; v < alphaSize; v++) {
|
|---|
| 589 | len[t][v] = (char)GREATER_ICOST;
|
|---|
| 590 | }
|
|---|
| 591 | }
|
|---|
| 592 |
|
|---|
| 593 | /*--- Decide how many coding tables to use ---*/
|
|---|
| 594 | if (nMTF <= 0) {
|
|---|
| 595 | Panic();
|
|---|
| 596 | }
|
|---|
| 597 |
|
|---|
| 598 | if (nMTF < 200) {
|
|---|
| 599 | nGroups = 2;
|
|---|
| 600 | } else if (nMTF < 600) {
|
|---|
| 601 | nGroups = 3;
|
|---|
| 602 | } else if (nMTF < 1200) {
|
|---|
| 603 | nGroups = 4;
|
|---|
| 604 | } else if (nMTF < 2400) {
|
|---|
| 605 | nGroups = 5;
|
|---|
| 606 | } else {
|
|---|
| 607 | nGroups = 6;
|
|---|
| 608 | }
|
|---|
| 609 |
|
|---|
| 610 | /*--- Generate an initial set of coding tables ---*/
|
|---|
| 611 | int nPart = nGroups;
|
|---|
| 612 | int remF = nMTF;
|
|---|
| 613 | gs = 0;
|
|---|
| 614 | while (nPart > 0) {
|
|---|
| 615 | int tFreq = remF / nPart;
|
|---|
| 616 | int aFreq = 0;
|
|---|
| 617 | ge = gs - 1;
|
|---|
| 618 | while (aFreq < tFreq && ge < alphaSize - 1) {
|
|---|
| 619 | ge++;
|
|---|
| 620 | aFreq += mtfFreq[ge];
|
|---|
| 621 | }
|
|---|
| 622 |
|
|---|
| 623 | if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1)) {
|
|---|
| 624 | aFreq -= mtfFreq[ge];
|
|---|
| 625 | ge--;
|
|---|
| 626 | }
|
|---|
| 627 |
|
|---|
| 628 | for (int v = 0; v < alphaSize; v++) {
|
|---|
| 629 | if (v >= gs && v <= ge) {
|
|---|
| 630 | len[nPart - 1][v] = (char)LESSER_ICOST;
|
|---|
| 631 | } else {
|
|---|
| 632 | len[nPart - 1][v] = (char)GREATER_ICOST;
|
|---|
| 633 | }
|
|---|
| 634 | }
|
|---|
| 635 |
|
|---|
| 636 | nPart--;
|
|---|
| 637 | gs = ge + 1;
|
|---|
| 638 | remF -= aFreq;
|
|---|
| 639 | }
|
|---|
| 640 |
|
|---|
| 641 | int[][] rfreq = new int[BZip2Constants.GroupCount][];
|
|---|
| 642 | for (int i = 0; i < BZip2Constants.GroupCount; ++i) {
|
|---|
| 643 | rfreq[i] = new int[BZip2Constants.MaximumAlphaSize];
|
|---|
| 644 | }
|
|---|
| 645 |
|
|---|
| 646 | int[] fave = new int[BZip2Constants.GroupCount];
|
|---|
| 647 | short[] cost = new short[BZip2Constants.GroupCount];
|
|---|
| 648 | /*---
|
|---|
| 649 | Iterate up to N_ITERS times to improve the tables.
|
|---|
| 650 | ---*/
|
|---|
| 651 | for (iter = 0; iter < BZip2Constants.NumberOfIterations; ++iter) {
|
|---|
| 652 | for (int t = 0; t < nGroups; ++t) {
|
|---|
| 653 | fave[t] = 0;
|
|---|
| 654 | }
|
|---|
| 655 |
|
|---|
| 656 | for (int t = 0; t < nGroups; ++t) {
|
|---|
| 657 | for (int v = 0; v < alphaSize; ++v) {
|
|---|
| 658 | rfreq[t][v] = 0;
|
|---|
| 659 | }
|
|---|
| 660 | }
|
|---|
| 661 |
|
|---|
| 662 | nSelectors = 0;
|
|---|
| 663 | totc = 0;
|
|---|
| 664 | gs = 0;
|
|---|
| 665 | while (true) {
|
|---|
| 666 | /*--- Set group start & end marks. --*/
|
|---|
| 667 | if (gs >= nMTF) {
|
|---|
| 668 | break;
|
|---|
| 669 | }
|
|---|
| 670 | ge = gs + BZip2Constants.GroupSize - 1;
|
|---|
| 671 | if (ge >= nMTF) {
|
|---|
| 672 | ge = nMTF - 1;
|
|---|
| 673 | }
|
|---|
| 674 |
|
|---|
| 675 | /*--
|
|---|
| 676 | Calculate the cost of this group as coded
|
|---|
| 677 | by each of the coding tables.
|
|---|
| 678 | --*/
|
|---|
| 679 | for (int t = 0; t < nGroups; t++) {
|
|---|
| 680 | cost[t] = 0;
|
|---|
| 681 | }
|
|---|
| 682 |
|
|---|
| 683 | if (nGroups == 6) {
|
|---|
| 684 | short cost0, cost1, cost2, cost3, cost4, cost5;
|
|---|
| 685 | cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
|
|---|
| 686 | for (int i = gs; i <= ge; ++i) {
|
|---|
| 687 | short icv = szptr[i];
|
|---|
| 688 | cost0 += (short)len[0][icv];
|
|---|
| 689 | cost1 += (short)len[1][icv];
|
|---|
| 690 | cost2 += (short)len[2][icv];
|
|---|
| 691 | cost3 += (short)len[3][icv];
|
|---|
| 692 | cost4 += (short)len[4][icv];
|
|---|
| 693 | cost5 += (short)len[5][icv];
|
|---|
| 694 | }
|
|---|
| 695 | cost[0] = cost0;
|
|---|
| 696 | cost[1] = cost1;
|
|---|
| 697 | cost[2] = cost2;
|
|---|
| 698 | cost[3] = cost3;
|
|---|
| 699 | cost[4] = cost4;
|
|---|
| 700 | cost[5] = cost5;
|
|---|
| 701 | } else {
|
|---|
| 702 | for (int i = gs; i <= ge; ++i) {
|
|---|
| 703 | short icv = szptr[i];
|
|---|
| 704 | for (int t = 0; t < nGroups; t++) {
|
|---|
| 705 | cost[t] += (short)len[t][icv];
|
|---|
| 706 | }
|
|---|
| 707 | }
|
|---|
| 708 | }
|
|---|
| 709 |
|
|---|
| 710 | /*--
|
|---|
| 711 | Find the coding table which is best for this group,
|
|---|
| 712 | and record its identity in the selector table.
|
|---|
| 713 | --*/
|
|---|
| 714 | bc = 999999999;
|
|---|
| 715 | bt = -1;
|
|---|
| 716 | for (int t = 0; t < nGroups; ++t) {
|
|---|
| 717 | if (cost[t] < bc) {
|
|---|
| 718 | bc = cost[t];
|
|---|
| 719 | bt = t;
|
|---|
| 720 | }
|
|---|
| 721 | }
|
|---|
| 722 | totc += bc;
|
|---|
| 723 | fave[bt]++;
|
|---|
| 724 | selector[nSelectors] = (char)bt;
|
|---|
| 725 | nSelectors++;
|
|---|
| 726 |
|
|---|
| 727 | /*--
|
|---|
| 728 | Increment the symbol frequencies for the selected table.
|
|---|
| 729 | --*/
|
|---|
| 730 | for (int i = gs; i <= ge; ++i) {
|
|---|
| 731 | ++rfreq[bt][szptr[i]];
|
|---|
| 732 | }
|
|---|
| 733 |
|
|---|
| 734 | gs = ge+1;
|
|---|
| 735 | }
|
|---|
| 736 |
|
|---|
| 737 | /*--
|
|---|
| 738 | Recompute the tables based on the accumulated frequencies.
|
|---|
| 739 | --*/
|
|---|
| 740 | for (int t = 0; t < nGroups; ++t) {
|
|---|
| 741 | HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
|
|---|
| 742 | }
|
|---|
| 743 | }
|
|---|
| 744 |
|
|---|
| 745 | rfreq = null;
|
|---|
| 746 | fave = null;
|
|---|
| 747 | cost = null;
|
|---|
| 748 |
|
|---|
| 749 | if (!(nGroups < 8)) {
|
|---|
| 750 | Panic();
|
|---|
| 751 | }
|
|---|
| 752 |
|
|---|
| 753 | if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZip2Constants.GroupSize)))) {
|
|---|
| 754 | Panic();
|
|---|
| 755 | }
|
|---|
| 756 |
|
|---|
| 757 | /*--- Compute MTF values for the selectors. ---*/
|
|---|
| 758 | char[] pos = new char[BZip2Constants.GroupCount];
|
|---|
| 759 | char ll_i, tmp2, tmp;
|
|---|
| 760 |
|
|---|
| 761 | for (int i = 0; i < nGroups; i++) {
|
|---|
| 762 | pos[i] = (char)i;
|
|---|
| 763 | }
|
|---|
| 764 |
|
|---|
| 765 | for (int i = 0; i < nSelectors; i++) {
|
|---|
| 766 | ll_i = selector[i];
|
|---|
| 767 | int j = 0;
|
|---|
| 768 | tmp = pos[j];
|
|---|
| 769 | while (ll_i != tmp) {
|
|---|
| 770 | j++;
|
|---|
| 771 | tmp2 = tmp;
|
|---|
| 772 | tmp = pos[j];
|
|---|
| 773 | pos[j] = tmp2;
|
|---|
| 774 | }
|
|---|
| 775 | pos[0] = tmp;
|
|---|
| 776 | selectorMtf[i] = (char)j;
|
|---|
| 777 | }
|
|---|
| 778 |
|
|---|
| 779 | int[][] code = new int[BZip2Constants.GroupCount][];
|
|---|
| 780 |
|
|---|
| 781 | for (int i = 0; i < BZip2Constants.GroupCount; ++i) {
|
|---|
| 782 | code[i] = new int[BZip2Constants.MaximumAlphaSize];
|
|---|
| 783 | }
|
|---|
| 784 |
|
|---|
| 785 | /*--- Assign actual codes for the tables. --*/
|
|---|
| 786 | for (int t = 0; t < nGroups; t++) {
|
|---|
| 787 | minLen = 32;
|
|---|
| 788 | maxLen = 0;
|
|---|
| 789 | for (int i = 0; i < alphaSize; i++) {
|
|---|
| 790 | if (len[t][i] > maxLen) {
|
|---|
| 791 | maxLen = len[t][i];
|
|---|
| 792 | }
|
|---|
| 793 | if (len[t][i] < minLen) {
|
|---|
| 794 | minLen = len[t][i];
|
|---|
| 795 | }
|
|---|
| 796 | }
|
|---|
| 797 | if (maxLen > 20) {
|
|---|
| 798 | Panic();
|
|---|
| 799 | }
|
|---|
| 800 | if (minLen < 1) {
|
|---|
| 801 | Panic();
|
|---|
| 802 | }
|
|---|
| 803 | HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
|
|---|
| 804 | }
|
|---|
| 805 |
|
|---|
| 806 | /*--- Transmit the mapping table. ---*/
|
|---|
| 807 | bool[] inUse16 = new bool[16];
|
|---|
| 808 | for (int i = 0; i < 16; ++i) {
|
|---|
| 809 | inUse16[i] = false;
|
|---|
| 810 | for (int j = 0; j < 16; ++j) {
|
|---|
| 811 | if (inUse[i * 16 + j]) {
|
|---|
| 812 | inUse16[i] = true;
|
|---|
| 813 | }
|
|---|
| 814 | }
|
|---|
| 815 | }
|
|---|
| 816 |
|
|---|
| 817 | for (int i = 0; i < 16; ++i) {
|
|---|
| 818 | if (inUse16[i]) {
|
|---|
| 819 | BsW(1,1);
|
|---|
| 820 | } else {
|
|---|
| 821 | BsW(1,0);
|
|---|
| 822 | }
|
|---|
| 823 | }
|
|---|
| 824 |
|
|---|
| 825 | for (int i = 0; i < 16; ++i) {
|
|---|
| 826 | if (inUse16[i]) {
|
|---|
| 827 | for (int j = 0; j < 16; ++j) {
|
|---|
| 828 | if (inUse[i * 16 + j]) {
|
|---|
| 829 | BsW(1,1);
|
|---|
| 830 | } else {
|
|---|
| 831 | BsW(1,0);
|
|---|
| 832 | }
|
|---|
| 833 | }
|
|---|
| 834 | }
|
|---|
| 835 | }
|
|---|
| 836 |
|
|---|
| 837 | /*--- Now the selectors. ---*/
|
|---|
| 838 | BsW(3, nGroups);
|
|---|
| 839 | BsW(15, nSelectors);
|
|---|
| 840 | for (int i = 0; i < nSelectors; ++i) {
|
|---|
| 841 | for (int j = 0; j < selectorMtf[i]; ++j) {
|
|---|
| 842 | BsW(1,1);
|
|---|
| 843 | }
|
|---|
| 844 | BsW(1,0);
|
|---|
| 845 | }
|
|---|
| 846 |
|
|---|
| 847 | /*--- Now the coding tables. ---*/
|
|---|
| 848 | for (int t = 0; t < nGroups; ++t) {
|
|---|
| 849 | int curr = len[t][0];
|
|---|
| 850 | BsW(5, curr);
|
|---|
| 851 | for (int i = 0; i < alphaSize; ++i) {
|
|---|
| 852 | while (curr < len[t][i]) {
|
|---|
| 853 | BsW(2, 2);
|
|---|
| 854 | curr++; /* 10 */
|
|---|
| 855 | }
|
|---|
| 856 | while (curr > len[t][i]) {
|
|---|
| 857 | BsW(2, 3);
|
|---|
| 858 | curr--; /* 11 */
|
|---|
| 859 | }
|
|---|
| 860 | BsW (1, 0);
|
|---|
| 861 | }
|
|---|
| 862 | }
|
|---|
| 863 |
|
|---|
| 864 | /*--- And finally, the block data proper ---*/
|
|---|
| 865 | selCtr = 0;
|
|---|
| 866 | gs = 0;
|
|---|
| 867 | while (true) {
|
|---|
| 868 | if (gs >= nMTF) {
|
|---|
| 869 | break;
|
|---|
| 870 | }
|
|---|
| 871 | ge = gs + BZip2Constants.GroupSize - 1;
|
|---|
| 872 | if (ge >= nMTF) {
|
|---|
| 873 | ge = nMTF - 1;
|
|---|
| 874 | }
|
|---|
| 875 |
|
|---|
| 876 | for (int i = gs; i <= ge; i++) {
|
|---|
| 877 | BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]);
|
|---|
| 878 | }
|
|---|
| 879 |
|
|---|
| 880 | gs = ge + 1;
|
|---|
| 881 | ++selCtr;
|
|---|
| 882 | }
|
|---|
| 883 | if (!(selCtr == nSelectors)) {
|
|---|
| 884 | Panic();
|
|---|
| 885 | }
|
|---|
| 886 | }
|
|---|
| 887 |
|
|---|
| 888 | void MoveToFrontCodeAndSend ()
|
|---|
| 889 | {
|
|---|
| 890 | BsPutIntVS(24, origPtr);
|
|---|
| 891 | GenerateMTFValues();
|
|---|
| 892 | SendMTFValues();
|
|---|
| 893 | }
|
|---|
| 894 |
|
|---|
| 895 | void SimpleSort(int lo, int hi, int d)
|
|---|
| 896 | {
|
|---|
| 897 | int i, j, h, bigN, hp;
|
|---|
| 898 | int v;
|
|---|
| 899 |
|
|---|
| 900 | bigN = hi - lo + 1;
|
|---|
| 901 | if (bigN < 2) {
|
|---|
| 902 | return;
|
|---|
| 903 | }
|
|---|
| 904 |
|
|---|
| 905 | hp = 0;
|
|---|
| 906 | while (increments[hp] < bigN) {
|
|---|
| 907 | hp++;
|
|---|
| 908 | }
|
|---|
| 909 | hp--;
|
|---|
| 910 |
|
|---|
| 911 | for (; hp >= 0; hp--) {
|
|---|
| 912 | h = increments[hp];
|
|---|
| 913 |
|
|---|
| 914 | i = lo + h;
|
|---|
| 915 | while (true) {
|
|---|
| 916 | /*-- copy 1 --*/
|
|---|
| 917 | if (i > hi)
|
|---|
| 918 | break;
|
|---|
| 919 | v = zptr[i];
|
|---|
| 920 | j = i;
|
|---|
| 921 | while (FullGtU(zptr[j-h]+d, v+d)) {
|
|---|
| 922 | zptr[j] = zptr[j-h];
|
|---|
| 923 | j = j - h;
|
|---|
| 924 | if (j <= (lo + h - 1))
|
|---|
| 925 | break;
|
|---|
| 926 | }
|
|---|
| 927 | zptr[j] = v;
|
|---|
| 928 | i++;
|
|---|
| 929 |
|
|---|
| 930 | /*-- copy 2 --*/
|
|---|
| 931 | if (i > hi) {
|
|---|
| 932 | break;
|
|---|
| 933 | }
|
|---|
| 934 | v = zptr[i];
|
|---|
| 935 | j = i;
|
|---|
| 936 | while (FullGtU ( zptr[j-h]+d, v+d )) {
|
|---|
| 937 | zptr[j] = zptr[j-h];
|
|---|
| 938 | j = j - h;
|
|---|
| 939 | if (j <= (lo + h - 1)) {
|
|---|
| 940 | break;
|
|---|
| 941 | }
|
|---|
| 942 | }
|
|---|
| 943 | zptr[j] = v;
|
|---|
| 944 | i++;
|
|---|
| 945 |
|
|---|
| 946 | /*-- copy 3 --*/
|
|---|
| 947 | if (i > hi) {
|
|---|
| 948 | break;
|
|---|
| 949 | }
|
|---|
| 950 | v = zptr[i];
|
|---|
| 951 | j = i;
|
|---|
| 952 | while (FullGtU ( zptr[j-h]+d, v+d)) {
|
|---|
| 953 | zptr[j] = zptr[j-h];
|
|---|
| 954 | j = j - h;
|
|---|
| 955 | if (j <= (lo + h - 1)) {
|
|---|
| 956 | break;
|
|---|
| 957 | }
|
|---|
| 958 | }
|
|---|
| 959 | zptr[j] = v;
|
|---|
| 960 | i++;
|
|---|
| 961 |
|
|---|
| 962 | if (workDone > workLimit && firstAttempt) {
|
|---|
| 963 | return;
|
|---|
| 964 | }
|
|---|
| 965 | }
|
|---|
| 966 | }
|
|---|
| 967 | }
|
|---|
| 968 |
|
|---|
| 969 | void Vswap(int p1, int p2, int n )
|
|---|
| 970 | {
|
|---|
| 971 | int temp = 0;
|
|---|
| 972 | while (n > 0) {
|
|---|
| 973 | temp = zptr[p1];
|
|---|
| 974 | zptr[p1] = zptr[p2];
|
|---|
| 975 | zptr[p2] = temp;
|
|---|
| 976 | p1++;
|
|---|
| 977 | p2++;
|
|---|
| 978 | n--;
|
|---|
| 979 | }
|
|---|
| 980 | }
|
|---|
| 981 |
|
|---|
| 982 | void QSort3(int loSt, int hiSt, int dSt)
|
|---|
| 983 | {
|
|---|
| 984 | int unLo, unHi, ltLo, gtHi, med, n, m;
|
|---|
| 985 | int lo, hi, d;
|
|---|
| 986 |
|
|---|
| 987 | StackElement[] stack = new StackElement[QSORT_STACK_SIZE];
|
|---|
| 988 |
|
|---|
| 989 | int sp = 0;
|
|---|
| 990 |
|
|---|
| 991 | stack[sp].ll = loSt;
|
|---|
| 992 | stack[sp].hh = hiSt;
|
|---|
| 993 | stack[sp].dd = dSt;
|
|---|
| 994 | sp++;
|
|---|
| 995 |
|
|---|
| 996 | while (sp > 0) {
|
|---|
| 997 | if (sp >= QSORT_STACK_SIZE) {
|
|---|
| 998 | Panic();
|
|---|
| 999 | }
|
|---|
| 1000 |
|
|---|
| 1001 | sp--;
|
|---|
| 1002 | lo = stack[sp].ll;
|
|---|
| 1003 | hi = stack[sp].hh;
|
|---|
| 1004 | d = stack[sp].dd;
|
|---|
| 1005 |
|
|---|
| 1006 | if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
|
|---|
| 1007 | SimpleSort(lo, hi, d);
|
|---|
| 1008 | if (workDone > workLimit && firstAttempt) {
|
|---|
| 1009 | return;
|
|---|
| 1010 | }
|
|---|
| 1011 | continue;
|
|---|
| 1012 | }
|
|---|
| 1013 |
|
|---|
| 1014 | med = Med3(block[zptr[lo] + d + 1],
|
|---|
| 1015 | block[zptr[hi ] + d + 1],
|
|---|
| 1016 | block[zptr[(lo + hi) >> 1] + d + 1]);
|
|---|
| 1017 |
|
|---|
| 1018 | unLo = ltLo = lo;
|
|---|
| 1019 | unHi = gtHi = hi;
|
|---|
| 1020 |
|
|---|
| 1021 | while (true) {
|
|---|
| 1022 | while (true) {
|
|---|
| 1023 | if (unLo > unHi) {
|
|---|
| 1024 | break;
|
|---|
| 1025 | }
|
|---|
| 1026 | n = ((int)block[zptr[unLo]+d + 1]) - med;
|
|---|
| 1027 | if (n == 0) {
|
|---|
| 1028 | int temp = zptr[unLo];
|
|---|
| 1029 | zptr[unLo] = zptr[ltLo];
|
|---|
| 1030 | zptr[ltLo] = temp;
|
|---|
| 1031 | ltLo++;
|
|---|
| 1032 | unLo++;
|
|---|
| 1033 | continue;
|
|---|
| 1034 | }
|
|---|
| 1035 | if (n > 0) {
|
|---|
| 1036 | break;
|
|---|
| 1037 | }
|
|---|
| 1038 | unLo++;
|
|---|
| 1039 | }
|
|---|
| 1040 |
|
|---|
| 1041 | while (true) {
|
|---|
| 1042 | if (unLo > unHi) {
|
|---|
| 1043 | break;
|
|---|
| 1044 | }
|
|---|
| 1045 | n = ((int)block[zptr[unHi]+d + 1]) - med;
|
|---|
| 1046 | if (n == 0) {
|
|---|
| 1047 | int temp = zptr[unHi];
|
|---|
| 1048 | zptr[unHi] = zptr[gtHi];
|
|---|
| 1049 | zptr[gtHi] = temp;
|
|---|
| 1050 | gtHi--;
|
|---|
| 1051 | unHi--;
|
|---|
| 1052 | continue;
|
|---|
| 1053 | }
|
|---|
| 1054 | if (n < 0) {
|
|---|
| 1055 | break;
|
|---|
| 1056 | }
|
|---|
| 1057 | unHi--;
|
|---|
| 1058 | }
|
|---|
| 1059 |
|
|---|
| 1060 | if (unLo > unHi) {
|
|---|
| 1061 | break;
|
|---|
| 1062 | }
|
|---|
| 1063 |
|
|---|
| 1064 | {
|
|---|
| 1065 | int temp = zptr[unLo];
|
|---|
| 1066 | zptr[unLo] = zptr[unHi];
|
|---|
| 1067 | zptr[unHi] = temp;
|
|---|
| 1068 | unLo++;
|
|---|
| 1069 | unHi--;
|
|---|
| 1070 | }
|
|---|
| 1071 | }
|
|---|
| 1072 |
|
|---|
| 1073 | if (gtHi < ltLo) {
|
|---|
| 1074 | stack[sp].ll = lo;
|
|---|
| 1075 | stack[sp].hh = hi;
|
|---|
| 1076 | stack[sp].dd = d+1;
|
|---|
| 1077 | sp++;
|
|---|
| 1078 | continue;
|
|---|
| 1079 | }
|
|---|
| 1080 |
|
|---|
| 1081 | n = ((ltLo-lo) < (unLo-ltLo)) ? (ltLo-lo) : (unLo-ltLo);
|
|---|
| 1082 | Vswap(lo, unLo-n, n);
|
|---|
| 1083 | m = ((hi-gtHi) < (gtHi-unHi)) ? (hi-gtHi) : (gtHi-unHi);
|
|---|
| 1084 | Vswap(unLo, hi-m+1, m);
|
|---|
| 1085 |
|
|---|
| 1086 | n = lo + unLo - ltLo - 1;
|
|---|
| 1087 | m = hi - (gtHi - unHi) + 1;
|
|---|
| 1088 |
|
|---|
| 1089 | stack[sp].ll = lo;
|
|---|
| 1090 | stack[sp].hh = n;
|
|---|
| 1091 | stack[sp].dd = d;
|
|---|
| 1092 | sp++;
|
|---|
| 1093 |
|
|---|
| 1094 | stack[sp].ll = n + 1;
|
|---|
| 1095 | stack[sp].hh = m - 1;
|
|---|
| 1096 | stack[sp].dd = d+1;
|
|---|
| 1097 | sp++;
|
|---|
| 1098 |
|
|---|
| 1099 | stack[sp].ll = m;
|
|---|
| 1100 | stack[sp].hh = hi;
|
|---|
| 1101 | stack[sp].dd = d;
|
|---|
| 1102 | sp++;
|
|---|
| 1103 | }
|
|---|
| 1104 | }
|
|---|
| 1105 |
|
|---|
| 1106 | void MainSort()
|
|---|
| 1107 | {
|
|---|
| 1108 | int i, j, ss, sb;
|
|---|
| 1109 | int[] runningOrder = new int[256];
|
|---|
| 1110 | int[] copy = new int[256];
|
|---|
| 1111 | bool[] bigDone = new bool[256];
|
|---|
| 1112 | int c1, c2;
|
|---|
| 1113 | int numQSorted;
|
|---|
| 1114 |
|
|---|
| 1115 | /*--
|
|---|
| 1116 | In the various block-sized structures, live data runs
|
|---|
| 1117 | from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
|
|---|
| 1118 | set up the overshoot area for block.
|
|---|
| 1119 | --*/
|
|---|
| 1120 |
|
|---|
| 1121 | // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
|
|---|
| 1122 | for (i = 0; i < BZip2Constants.OvershootBytes; i++) {
|
|---|
| 1123 | block[last + i + 2] = block[(i % (last + 1)) + 1];
|
|---|
| 1124 | }
|
|---|
| 1125 | for (i = 0; i <= last + BZip2Constants.OvershootBytes; i++) {
|
|---|
| 1126 | quadrant[i] = 0;
|
|---|
| 1127 | }
|
|---|
| 1128 |
|
|---|
| 1129 | block[0] = (byte)(block[last + 1]);
|
|---|
| 1130 |
|
|---|
| 1131 | if (last < 4000) {
|
|---|
| 1132 | /*--
|
|---|
| 1133 | Use simpleSort(), since the full sorting mechanism
|
|---|
| 1134 | has quite a large constant overhead.
|
|---|
| 1135 | --*/
|
|---|
| 1136 | for (i = 0; i <= last; i++) {
|
|---|
| 1137 | zptr[i] = i;
|
|---|
| 1138 | }
|
|---|
| 1139 | firstAttempt = false;
|
|---|
| 1140 | workDone = workLimit = 0;
|
|---|
| 1141 | SimpleSort(0, last, 0);
|
|---|
| 1142 | } else {
|
|---|
| 1143 | numQSorted = 0;
|
|---|
| 1144 | for (i = 0; i <= 255; i++) {
|
|---|
| 1145 | bigDone[i] = false;
|
|---|
| 1146 | }
|
|---|
| 1147 | for (i = 0; i <= 65536; i++) {
|
|---|
| 1148 | ftab[i] = 0;
|
|---|
| 1149 | }
|
|---|
| 1150 |
|
|---|
| 1151 | c1 = block[0];
|
|---|
| 1152 | for (i = 0; i <= last; i++) {
|
|---|
| 1153 | c2 = block[i + 1];
|
|---|
| 1154 | ftab[(c1 << 8) + c2]++;
|
|---|
| 1155 | c1 = c2;
|
|---|
| 1156 | }
|
|---|
| 1157 |
|
|---|
| 1158 | for (i = 1; i <= 65536; i++) {
|
|---|
| 1159 | ftab[i] += ftab[i - 1];
|
|---|
| 1160 | }
|
|---|
| 1161 |
|
|---|
| 1162 | c1 = block[1];
|
|---|
| 1163 | for (i = 0; i < last; i++) {
|
|---|
| 1164 | c2 = block[i + 2];
|
|---|
| 1165 | j = (c1 << 8) + c2;
|
|---|
| 1166 | c1 = c2;
|
|---|
| 1167 | ftab[j]--;
|
|---|
| 1168 | zptr[ftab[j]] = i;
|
|---|
| 1169 | }
|
|---|
| 1170 |
|
|---|
| 1171 | j = ((block[last + 1]) << 8) + (block[1]);
|
|---|
| 1172 | ftab[j]--;
|
|---|
| 1173 | zptr[ftab[j]] = last;
|
|---|
| 1174 |
|
|---|
| 1175 | /*--
|
|---|
| 1176 | Now ftab contains the first loc of every small bucket.
|
|---|
| 1177 | Calculate the running order, from smallest to largest
|
|---|
| 1178 | big bucket.
|
|---|
| 1179 | --*/
|
|---|
| 1180 |
|
|---|
| 1181 | for (i = 0; i <= 255; i++) {
|
|---|
| 1182 | runningOrder[i] = i;
|
|---|
| 1183 | }
|
|---|
| 1184 |
|
|---|
| 1185 | int vv;
|
|---|
| 1186 | int h = 1;
|
|---|
| 1187 | do {
|
|---|
| 1188 | h = 3 * h + 1;
|
|---|
| 1189 | } while (h <= 256);
|
|---|
| 1190 | do {
|
|---|
| 1191 | h = h / 3;
|
|---|
| 1192 | for (i = h; i <= 255; i++) {
|
|---|
| 1193 | vv = runningOrder[i];
|
|---|
| 1194 | j = i;
|
|---|
| 1195 | while ((ftab[((runningOrder[j-h])+1) << 8] - ftab[(runningOrder[j-h]) << 8]) > (ftab[((vv)+1) << 8] - ftab[(vv) << 8])) {
|
|---|
| 1196 | runningOrder[j] = runningOrder[j-h];
|
|---|
| 1197 | j = j - h;
|
|---|
| 1198 | if (j <= (h - 1)) {
|
|---|
| 1199 | break;
|
|---|
| 1200 | }
|
|---|
| 1201 | }
|
|---|
| 1202 | runningOrder[j] = vv;
|
|---|
| 1203 | }
|
|---|
| 1204 | } while (h != 1);
|
|---|
| 1205 |
|
|---|
| 1206 | /*--
|
|---|
| 1207 | The main sorting loop.
|
|---|
| 1208 | --*/
|
|---|
| 1209 | for (i = 0; i <= 255; i++) {
|
|---|
| 1210 |
|
|---|
| 1211 | /*--
|
|---|
| 1212 | Process big buckets, starting with the least full.
|
|---|
| 1213 | --*/
|
|---|
| 1214 | ss = runningOrder[i];
|
|---|
| 1215 |
|
|---|
| 1216 | /*--
|
|---|
| 1217 | Complete the big bucket [ss] by quicksorting
|
|---|
| 1218 | any unsorted small buckets [ss, j]. Hopefully
|
|---|
| 1219 | previous pointer-scanning phases have already
|
|---|
| 1220 | completed many of the small buckets [ss, j], so
|
|---|
| 1221 | we don't have to sort them at all.
|
|---|
| 1222 | --*/
|
|---|
| 1223 | for (j = 0; j <= 255; j++) {
|
|---|
| 1224 | sb = (ss << 8) + j;
|
|---|
| 1225 | if(!((ftab[sb] & SETMASK) == SETMASK)) {
|
|---|
| 1226 | int lo = ftab[sb] & CLEARMASK;
|
|---|
| 1227 | int hi = (ftab[sb+1] & CLEARMASK) - 1;
|
|---|
| 1228 | if (hi > lo) {
|
|---|
| 1229 | QSort3(lo, hi, 2);
|
|---|
| 1230 | numQSorted += (hi - lo + 1);
|
|---|
| 1231 | if (workDone > workLimit && firstAttempt) {
|
|---|
| 1232 | return;
|
|---|
| 1233 | }
|
|---|
| 1234 | }
|
|---|
| 1235 | ftab[sb] |= SETMASK;
|
|---|
| 1236 | }
|
|---|
| 1237 | }
|
|---|
| 1238 |
|
|---|
| 1239 | /*--
|
|---|
| 1240 | The ss big bucket is now done. Record this fact,
|
|---|
| 1241 | and update the quadrant descriptors. Remember to
|
|---|
| 1242 | update quadrants in the overshoot area too, if
|
|---|
| 1243 | necessary. The "if (i < 255)" test merely skips
|
|---|
| 1244 | this updating for the last bucket processed, since
|
|---|
| 1245 | updating for the last bucket is pointless.
|
|---|
| 1246 | --*/
|
|---|
| 1247 | bigDone[ss] = true;
|
|---|
| 1248 |
|
|---|
| 1249 | if (i < 255) {
|
|---|
| 1250 | int bbStart = ftab[ss << 8] & CLEARMASK;
|
|---|
| 1251 | int bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
|
|---|
| 1252 | int shifts = 0;
|
|---|
| 1253 |
|
|---|
| 1254 | while ((bbSize >> shifts) > 65534) {
|
|---|
| 1255 | shifts++;
|
|---|
| 1256 | }
|
|---|
| 1257 |
|
|---|
| 1258 | for (j = 0; j < bbSize; j++) {
|
|---|
| 1259 | int a2update = zptr[bbStart + j];
|
|---|
| 1260 | int qVal = (j >> shifts);
|
|---|
| 1261 | quadrant[a2update] = qVal;
|
|---|
| 1262 | if (a2update < BZip2Constants.OvershootBytes) {
|
|---|
| 1263 | quadrant[a2update + last + 1] = qVal;
|
|---|
| 1264 | }
|
|---|
| 1265 | }
|
|---|
| 1266 |
|
|---|
| 1267 | if (!(((bbSize-1) >> shifts) <= 65535)) {
|
|---|
| 1268 | Panic();
|
|---|
| 1269 | }
|
|---|
| 1270 | }
|
|---|
| 1271 |
|
|---|
| 1272 | /*--
|
|---|
| 1273 | Now scan this big bucket so as to synthesise the
|
|---|
| 1274 | sorted order for small buckets [t, ss] for all t != ss.
|
|---|
| 1275 | --*/
|
|---|
| 1276 | for (j = 0; j <= 255; j++) {
|
|---|
| 1277 | copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
|---|
| 1278 | }
|
|---|
| 1279 |
|
|---|
| 1280 | for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss+1) << 8] & CLEARMASK); j++) {
|
|---|
| 1281 | c1 = block[zptr[j]];
|
|---|
| 1282 | if (!bigDone[c1]) {
|
|---|
| 1283 | zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
|
|---|
| 1284 | copy[c1] ++;
|
|---|
| 1285 | }
|
|---|
| 1286 | }
|
|---|
| 1287 |
|
|---|
| 1288 | for (j = 0; j <= 255; j++) {
|
|---|
| 1289 | ftab[(j << 8) + ss] |= SETMASK;
|
|---|
| 1290 | }
|
|---|
| 1291 | }
|
|---|
| 1292 | }
|
|---|
| 1293 | }
|
|---|
| 1294 |
|
|---|
| 1295 | void RandomiseBlock()
|
|---|
| 1296 | {
|
|---|
| 1297 | int i;
|
|---|
| 1298 | int rNToGo = 0;
|
|---|
| 1299 | int rTPos = 0;
|
|---|
| 1300 | for (i = 0; i < 256; i++) {
|
|---|
| 1301 | inUse[i] = false;
|
|---|
| 1302 | }
|
|---|
| 1303 |
|
|---|
| 1304 | for (i = 0; i <= last; i++) {
|
|---|
| 1305 | if (rNToGo == 0) {
|
|---|
| 1306 | rNToGo = (int)BZip2Constants.RandomNumbers[rTPos];
|
|---|
| 1307 | rTPos++;
|
|---|
| 1308 | if (rTPos == 512) {
|
|---|
| 1309 | rTPos = 0;
|
|---|
| 1310 | }
|
|---|
| 1311 | }
|
|---|
| 1312 | rNToGo--;
|
|---|
| 1313 | block[i + 1] ^= (byte)((rNToGo == 1) ? 1 : 0);
|
|---|
| 1314 | // handle 16 bit signed numbers
|
|---|
| 1315 | block[i + 1] &= 0xFF;
|
|---|
| 1316 |
|
|---|
| 1317 | inUse[block[i + 1]] = true;
|
|---|
| 1318 | }
|
|---|
| 1319 | }
|
|---|
| 1320 |
|
|---|
| 1321 | void DoReversibleTransformation()
|
|---|
| 1322 | {
|
|---|
| 1323 | workLimit = workFactor * last;
|
|---|
| 1324 | workDone = 0;
|
|---|
| 1325 | blockRandomised = false;
|
|---|
| 1326 | firstAttempt = true;
|
|---|
| 1327 |
|
|---|
| 1328 | MainSort();
|
|---|
| 1329 |
|
|---|
| 1330 | if (workDone > workLimit && firstAttempt) {
|
|---|
| 1331 | RandomiseBlock();
|
|---|
| 1332 | workLimit = workDone = 0;
|
|---|
| 1333 | blockRandomised = true;
|
|---|
| 1334 | firstAttempt = false;
|
|---|
| 1335 | MainSort();
|
|---|
| 1336 | }
|
|---|
| 1337 |
|
|---|
| 1338 | origPtr = -1;
|
|---|
| 1339 | for (int i = 0; i <= last; i++) {
|
|---|
| 1340 | if (zptr[i] == 0) {
|
|---|
| 1341 | origPtr = i;
|
|---|
| 1342 | break;
|
|---|
| 1343 | }
|
|---|
| 1344 | }
|
|---|
| 1345 |
|
|---|
| 1346 | if (origPtr == -1) {
|
|---|
| 1347 | Panic();
|
|---|
| 1348 | }
|
|---|
| 1349 | }
|
|---|
| 1350 |
|
|---|
| 1351 | bool FullGtU(int i1, int i2)
|
|---|
| 1352 | {
|
|---|
| 1353 | int k;
|
|---|
| 1354 | byte c1, c2;
|
|---|
| 1355 | int s1, s2;
|
|---|
| 1356 |
|
|---|
| 1357 | c1 = block[i1 + 1];
|
|---|
| 1358 | c2 = block[i2 + 1];
|
|---|
| 1359 | if (c1 != c2) {
|
|---|
| 1360 | return c1 > c2;
|
|---|
| 1361 | }
|
|---|
| 1362 | i1++;
|
|---|
| 1363 | i2++;
|
|---|
| 1364 |
|
|---|
| 1365 | c1 = block[i1 + 1];
|
|---|
| 1366 | c2 = block[i2 + 1];
|
|---|
| 1367 | if (c1 != c2) {
|
|---|
| 1368 | return c1 > c2;
|
|---|
| 1369 | }
|
|---|
| 1370 | i1++;
|
|---|
| 1371 | i2++;
|
|---|
| 1372 |
|
|---|
| 1373 | c1 = block[i1 + 1];
|
|---|
| 1374 | c2 = block[i2 + 1];
|
|---|
| 1375 | if (c1 != c2) {
|
|---|
| 1376 | return c1 > c2;
|
|---|
| 1377 | }
|
|---|
| 1378 | i1++;
|
|---|
| 1379 | i2++;
|
|---|
| 1380 |
|
|---|
| 1381 | c1 = block[i1 + 1];
|
|---|
| 1382 | c2 = block[i2 + 1];
|
|---|
| 1383 | if (c1 != c2) {
|
|---|
| 1384 | return c1 > c2;
|
|---|
| 1385 | }
|
|---|
| 1386 | i1++;
|
|---|
| 1387 | i2++;
|
|---|
| 1388 |
|
|---|
| 1389 | c1 = block[i1 + 1];
|
|---|
| 1390 | c2 = block[i2 + 1];
|
|---|
| 1391 | if (c1 != c2) {
|
|---|
| 1392 | return c1 > c2;
|
|---|
| 1393 | }
|
|---|
| 1394 | i1++;
|
|---|
| 1395 | i2++;
|
|---|
| 1396 |
|
|---|
| 1397 | c1 = block[i1 + 1];
|
|---|
| 1398 | c2 = block[i2 + 1];
|
|---|
| 1399 | if (c1 != c2) {
|
|---|
| 1400 | return c1 > c2;
|
|---|
| 1401 | }
|
|---|
| 1402 | i1++;
|
|---|
| 1403 | i2++;
|
|---|
| 1404 |
|
|---|
| 1405 | k = last + 1;
|
|---|
| 1406 |
|
|---|
| 1407 | do {
|
|---|
| 1408 | c1 = block[i1 + 1];
|
|---|
| 1409 | c2 = block[i2 + 1];
|
|---|
| 1410 | if (c1 != c2) {
|
|---|
| 1411 | return c1 > c2;
|
|---|
| 1412 | }
|
|---|
| 1413 | s1 = quadrant[i1];
|
|---|
| 1414 | s2 = quadrant[i2];
|
|---|
| 1415 | if (s1 != s2) {
|
|---|
| 1416 | return s1 > s2;
|
|---|
| 1417 | }
|
|---|
| 1418 | i1++;
|
|---|
| 1419 | i2++;
|
|---|
| 1420 |
|
|---|
| 1421 | c1 = block[i1 + 1];
|
|---|
| 1422 | c2 = block[i2 + 1];
|
|---|
| 1423 | if (c1 != c2) {
|
|---|
| 1424 | return c1 > c2;
|
|---|
| 1425 | }
|
|---|
| 1426 | s1 = quadrant[i1];
|
|---|
| 1427 | s2 = quadrant[i2];
|
|---|
| 1428 | if (s1 != s2) {
|
|---|
| 1429 | return s1 > s2;
|
|---|
| 1430 | }
|
|---|
| 1431 | i1++;
|
|---|
| 1432 | i2++;
|
|---|
| 1433 |
|
|---|
| 1434 | c1 = block[i1 + 1];
|
|---|
| 1435 | c2 = block[i2 + 1];
|
|---|
| 1436 | if (c1 != c2) {
|
|---|
| 1437 | return c1 > c2;
|
|---|
| 1438 | }
|
|---|
| 1439 | s1 = quadrant[i1];
|
|---|
| 1440 | s2 = quadrant[i2];
|
|---|
| 1441 | if (s1 != s2) {
|
|---|
| 1442 | return s1 > s2;
|
|---|
| 1443 | }
|
|---|
| 1444 | i1++;
|
|---|
| 1445 | i2++;
|
|---|
| 1446 |
|
|---|
| 1447 | c1 = block[i1 + 1];
|
|---|
| 1448 | c2 = block[i2 + 1];
|
|---|
| 1449 | if (c1 != c2) {
|
|---|
| 1450 | return c1 > c2;
|
|---|
| 1451 | }
|
|---|
| 1452 | s1 = quadrant[i1];
|
|---|
| 1453 | s2 = quadrant[i2];
|
|---|
| 1454 | if (s1 != s2) {
|
|---|
| 1455 | return s1 > s2;
|
|---|
| 1456 | }
|
|---|
| 1457 | i1++;
|
|---|
| 1458 | i2++;
|
|---|
| 1459 |
|
|---|
| 1460 | if (i1 > last) {
|
|---|
| 1461 | i1 -= last;
|
|---|
| 1462 | i1--;
|
|---|
| 1463 | }
|
|---|
| 1464 | if (i2 > last) {
|
|---|
| 1465 | i2 -= last;
|
|---|
| 1466 | i2--;
|
|---|
| 1467 | }
|
|---|
| 1468 |
|
|---|
| 1469 | k -= 4;
|
|---|
| 1470 | ++workDone;
|
|---|
| 1471 | } while (k >= 0);
|
|---|
| 1472 |
|
|---|
| 1473 | return false;
|
|---|
| 1474 | }
|
|---|
| 1475 |
|
|---|
| 1476 | void AllocateCompressStructures()
|
|---|
| 1477 | {
|
|---|
| 1478 | int n = BZip2Constants.BaseBlockSize * blockSize100k;
|
|---|
| 1479 | block = new byte[(n + 1 + BZip2Constants.OvershootBytes)];
|
|---|
| 1480 | quadrant = new int[(n + BZip2Constants.OvershootBytes)];
|
|---|
| 1481 | zptr = new int[n];
|
|---|
| 1482 | ftab = new int[65537];
|
|---|
| 1483 |
|
|---|
| 1484 | if (block == null || quadrant == null || zptr == null || ftab == null) {
|
|---|
| 1485 | // int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
|
|---|
| 1486 | // compressOutOfMemory ( totalDraw, n );
|
|---|
| 1487 | }
|
|---|
| 1488 |
|
|---|
| 1489 | /*
|
|---|
| 1490 | The back end needs a place to store the MTF values
|
|---|
| 1491 | whilst it calculates the coding tables. We could
|
|---|
| 1492 | put them in the zptr array. However, these values
|
|---|
| 1493 | will fit in a short, so we overlay szptr at the
|
|---|
| 1494 | start of zptr, in the hope of reducing the number
|
|---|
| 1495 | of cache misses induced by the multiple traversals
|
|---|
| 1496 | of the MTF values when calculating coding tables.
|
|---|
| 1497 | Seems to improve compression speed by about 1%.
|
|---|
| 1498 | */
|
|---|
| 1499 | // szptr = zptr;
|
|---|
| 1500 |
|
|---|
| 1501 |
|
|---|
| 1502 | szptr = new short[2 * n];
|
|---|
| 1503 | }
|
|---|
| 1504 |
|
|---|
| 1505 | void GenerateMTFValues()
|
|---|
| 1506 | {
|
|---|
| 1507 | char[] yy = new char[256];
|
|---|
| 1508 | int i, j;
|
|---|
| 1509 | char tmp;
|
|---|
| 1510 | char tmp2;
|
|---|
| 1511 | int zPend;
|
|---|
| 1512 | int wr;
|
|---|
| 1513 | int EOB;
|
|---|
| 1514 |
|
|---|
| 1515 | MakeMaps();
|
|---|
| 1516 | EOB = nInUse+1;
|
|---|
| 1517 |
|
|---|
| 1518 | for (i = 0; i <= EOB; i++) {
|
|---|
| 1519 | mtfFreq[i] = 0;
|
|---|
| 1520 | }
|
|---|
| 1521 |
|
|---|
| 1522 | wr = 0;
|
|---|
| 1523 | zPend = 0;
|
|---|
| 1524 | for (i = 0; i < nInUse; i++) {
|
|---|
| 1525 | yy[i] = (char) i;
|
|---|
| 1526 | }
|
|---|
| 1527 |
|
|---|
| 1528 |
|
|---|
| 1529 | for (i = 0; i <= last; i++) {
|
|---|
| 1530 | char ll_i;
|
|---|
| 1531 |
|
|---|
| 1532 | ll_i = unseqToSeq[block[zptr[i]]];
|
|---|
| 1533 |
|
|---|
| 1534 | j = 0;
|
|---|
| 1535 | tmp = yy[j];
|
|---|
| 1536 | while (ll_i != tmp) {
|
|---|
| 1537 | j++;
|
|---|
| 1538 | tmp2 = tmp;
|
|---|
| 1539 | tmp = yy[j];
|
|---|
| 1540 | yy[j] = tmp2;
|
|---|
| 1541 | }
|
|---|
| 1542 | yy[0] = tmp;
|
|---|
| 1543 |
|
|---|
| 1544 | if (j == 0) {
|
|---|
| 1545 | zPend++;
|
|---|
| 1546 | } else {
|
|---|
| 1547 | if (zPend > 0) {
|
|---|
| 1548 | zPend--;
|
|---|
| 1549 | while (true) {
|
|---|
| 1550 | switch (zPend % 2) {
|
|---|
| 1551 | case 0:
|
|---|
| 1552 | szptr[wr] = (short)BZip2Constants.RunA;
|
|---|
| 1553 | wr++;
|
|---|
| 1554 | mtfFreq[BZip2Constants.RunA]++;
|
|---|
| 1555 | break;
|
|---|
| 1556 | case 1:
|
|---|
| 1557 | szptr[wr] = (short)BZip2Constants.RunB;
|
|---|
| 1558 | wr++;
|
|---|
| 1559 | mtfFreq[BZip2Constants.RunB]++;
|
|---|
| 1560 | break;
|
|---|
| 1561 | }
|
|---|
| 1562 | if (zPend < 2) {
|
|---|
| 1563 | break;
|
|---|
| 1564 | }
|
|---|
| 1565 | zPend = (zPend - 2) / 2;
|
|---|
| 1566 | }
|
|---|
| 1567 | zPend = 0;
|
|---|
| 1568 | }
|
|---|
| 1569 | szptr[wr] = (short)(j + 1);
|
|---|
| 1570 | wr++;
|
|---|
| 1571 | mtfFreq[j + 1]++;
|
|---|
| 1572 | }
|
|---|
| 1573 | }
|
|---|
| 1574 |
|
|---|
| 1575 | if (zPend > 0) {
|
|---|
| 1576 | zPend--;
|
|---|
| 1577 | while (true) {
|
|---|
| 1578 | switch (zPend % 2) {
|
|---|
| 1579 | case 0:
|
|---|
| 1580 | szptr[wr] = (short)BZip2Constants.RunA;
|
|---|
| 1581 | wr++;
|
|---|
| 1582 | mtfFreq[BZip2Constants.RunA]++;
|
|---|
| 1583 | break;
|
|---|
| 1584 | case 1:
|
|---|
| 1585 | szptr[wr] = (short)BZip2Constants.RunB;
|
|---|
| 1586 | wr++;
|
|---|
| 1587 | mtfFreq[BZip2Constants.RunB]++;
|
|---|
| 1588 | break;
|
|---|
| 1589 | }
|
|---|
| 1590 | if (zPend < 2) {
|
|---|
| 1591 | break;
|
|---|
| 1592 | }
|
|---|
| 1593 | zPend = (zPend - 2) / 2;
|
|---|
| 1594 | }
|
|---|
| 1595 | }
|
|---|
| 1596 |
|
|---|
| 1597 | szptr[wr] = (short)EOB;
|
|---|
| 1598 | wr++;
|
|---|
| 1599 | mtfFreq[EOB]++;
|
|---|
| 1600 |
|
|---|
| 1601 | nMTF = wr;
|
|---|
| 1602 | }
|
|---|
| 1603 |
|
|---|
| 1604 | static void Panic()
|
|---|
| 1605 | {
|
|---|
| 1606 | throw new BZip2Exception("BZip2 output stream panic");
|
|---|
| 1607 | }
|
|---|
| 1608 |
|
|---|
| 1609 | static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen)
|
|---|
| 1610 | {
|
|---|
| 1611 | /*--
|
|---|
| 1612 | Nodes and heap entries run from 1. Entry 0
|
|---|
| 1613 | for both the heap and nodes is a sentinel.
|
|---|
| 1614 | --*/
|
|---|
| 1615 | int nNodes, nHeap, n1, n2, j, k;
|
|---|
| 1616 | bool tooLong;
|
|---|
| 1617 |
|
|---|
| 1618 | int[] heap = new int[BZip2Constants.MaximumAlphaSize + 2];
|
|---|
| 1619 | int[] weight = new int[BZip2Constants.MaximumAlphaSize * 2];
|
|---|
| 1620 | int[] parent = new int[BZip2Constants.MaximumAlphaSize * 2];
|
|---|
| 1621 |
|
|---|
| 1622 | for (int i = 0; i < alphaSize; ++i)
|
|---|
| 1623 | {
|
|---|
| 1624 | weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
|
|---|
| 1625 | }
|
|---|
| 1626 |
|
|---|
| 1627 | while (true)
|
|---|
| 1628 | {
|
|---|
| 1629 | nNodes = alphaSize;
|
|---|
| 1630 | nHeap = 0;
|
|---|
| 1631 |
|
|---|
| 1632 | heap[0] = 0;
|
|---|
| 1633 | weight[0] = 0;
|
|---|
| 1634 | parent[0] = -2;
|
|---|
| 1635 |
|
|---|
| 1636 | for (int i = 1; i <= alphaSize; ++i)
|
|---|
| 1637 | {
|
|---|
| 1638 | parent[i] = -1;
|
|---|
| 1639 | nHeap++;
|
|---|
| 1640 | heap[nHeap] = i;
|
|---|
| 1641 | int zz = nHeap;
|
|---|
| 1642 | int tmp = heap[zz];
|
|---|
| 1643 | while (weight[tmp] < weight[heap[zz >> 1]])
|
|---|
| 1644 | {
|
|---|
| 1645 | heap[zz] = heap[zz >> 1];
|
|---|
| 1646 | zz >>= 1;
|
|---|
| 1647 | }
|
|---|
| 1648 | heap[zz] = tmp;
|
|---|
| 1649 | }
|
|---|
| 1650 | if (!(nHeap < (BZip2Constants.MaximumAlphaSize+2)))
|
|---|
| 1651 | {
|
|---|
| 1652 | Panic();
|
|---|
| 1653 | }
|
|---|
| 1654 |
|
|---|
| 1655 | while (nHeap > 1)
|
|---|
| 1656 | {
|
|---|
| 1657 | n1 = heap[1];
|
|---|
| 1658 | heap[1] = heap[nHeap];
|
|---|
| 1659 | nHeap--;
|
|---|
| 1660 | int zz = 1;
|
|---|
| 1661 | int yy = 0;
|
|---|
| 1662 | int tmp = heap[zz];
|
|---|
| 1663 | while (true)
|
|---|
| 1664 | {
|
|---|
| 1665 | yy = zz << 1;
|
|---|
| 1666 | if (yy > nHeap)
|
|---|
| 1667 | {
|
|---|
| 1668 | break;
|
|---|
| 1669 | }
|
|---|
| 1670 | if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]])
|
|---|
| 1671 | {
|
|---|
| 1672 | yy++;
|
|---|
| 1673 | }
|
|---|
| 1674 | if (weight[tmp] < weight[heap[yy]])
|
|---|
| 1675 | {
|
|---|
| 1676 | break;
|
|---|
| 1677 | }
|
|---|
| 1678 |
|
|---|
| 1679 | heap[zz] = heap[yy];
|
|---|
| 1680 | zz = yy;
|
|---|
| 1681 | }
|
|---|
| 1682 | heap[zz] = tmp;
|
|---|
| 1683 | n2 = heap[1];
|
|---|
| 1684 | heap[1] = heap[nHeap];
|
|---|
| 1685 | nHeap--;
|
|---|
| 1686 |
|
|---|
| 1687 | zz = 1;
|
|---|
| 1688 | yy = 0;
|
|---|
| 1689 | tmp = heap[zz];
|
|---|
| 1690 | while (true)
|
|---|
| 1691 | {
|
|---|
| 1692 | yy = zz << 1;
|
|---|
| 1693 | if (yy > nHeap)
|
|---|
| 1694 | {
|
|---|
| 1695 | break;
|
|---|
| 1696 | }
|
|---|
| 1697 | if (yy < nHeap && weight[heap[yy+1]] < weight[heap[yy]])
|
|---|
| 1698 | {
|
|---|
| 1699 | yy++;
|
|---|
| 1700 | }
|
|---|
| 1701 | if (weight[tmp] < weight[heap[yy]])
|
|---|
| 1702 | {
|
|---|
| 1703 | break;
|
|---|
| 1704 | }
|
|---|
| 1705 | heap[zz] = heap[yy];
|
|---|
| 1706 | zz = yy;
|
|---|
| 1707 | }
|
|---|
| 1708 | heap[zz] = tmp;
|
|---|
| 1709 | nNodes++;
|
|---|
| 1710 | parent[n1] = parent[n2] = nNodes;
|
|---|
| 1711 |
|
|---|
| 1712 | weight[nNodes] = (int)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00)) |
|
|---|
| 1713 | (int)(1 + (((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff)) ? (weight[n1] & 0x000000ff) : (weight[n2] & 0x000000ff)));
|
|---|
| 1714 |
|
|---|
| 1715 | parent[nNodes] = -1;
|
|---|
| 1716 | nHeap++;
|
|---|
| 1717 | heap[nHeap] = nNodes;
|
|---|
| 1718 |
|
|---|
| 1719 | zz = nHeap;
|
|---|
| 1720 | tmp = heap[zz];
|
|---|
| 1721 | while (weight[tmp] < weight[heap[zz >> 1]])
|
|---|
| 1722 | {
|
|---|
| 1723 | heap[zz] = heap[zz >> 1];
|
|---|
| 1724 | zz >>= 1;
|
|---|
| 1725 | }
|
|---|
| 1726 | heap[zz] = tmp;
|
|---|
| 1727 | }
|
|---|
| 1728 | if (!(nNodes < (BZip2Constants.MaximumAlphaSize * 2)))
|
|---|
| 1729 | {
|
|---|
| 1730 | Panic();
|
|---|
| 1731 | }
|
|---|
| 1732 |
|
|---|
| 1733 | tooLong = false;
|
|---|
| 1734 | for (int i = 1; i <= alphaSize; ++i)
|
|---|
| 1735 | {
|
|---|
| 1736 | j = 0;
|
|---|
| 1737 | k = i;
|
|---|
| 1738 | while (parent[k] >= 0)
|
|---|
| 1739 | {
|
|---|
| 1740 | k = parent[k];
|
|---|
| 1741 | j++;
|
|---|
| 1742 | }
|
|---|
| 1743 | len[i - 1] = (char)j;
|
|---|
| 1744 | if (j > maxLen)
|
|---|
| 1745 | {
|
|---|
| 1746 | tooLong = true;
|
|---|
| 1747 | }
|
|---|
| 1748 | }
|
|---|
| 1749 |
|
|---|
| 1750 | if (!tooLong)
|
|---|
| 1751 | {
|
|---|
| 1752 | break;
|
|---|
| 1753 | }
|
|---|
| 1754 |
|
|---|
| 1755 | for (int i = 1; i < alphaSize; ++i)
|
|---|
| 1756 | {
|
|---|
| 1757 | j = weight[i] >> 8;
|
|---|
| 1758 | j = 1 + (j / 2);
|
|---|
| 1759 | weight[i] = j << 8;
|
|---|
| 1760 | }
|
|---|
| 1761 | }
|
|---|
| 1762 | }
|
|---|
| 1763 |
|
|---|
| 1764 | static void HbAssignCodes (int[] code, char[] length, int minLen, int maxLen, int alphaSize)
|
|---|
| 1765 | {
|
|---|
| 1766 | int vec = 0;
|
|---|
| 1767 | for (int n = minLen; n <= maxLen; ++n)
|
|---|
| 1768 | {
|
|---|
| 1769 | for (int i = 0; i < alphaSize; ++i)
|
|---|
| 1770 | {
|
|---|
| 1771 | if (length[i] == n)
|
|---|
| 1772 | {
|
|---|
| 1773 | code[i] = vec;
|
|---|
| 1774 | ++vec;
|
|---|
| 1775 | }
|
|---|
| 1776 | }
|
|---|
| 1777 | vec <<= 1;
|
|---|
| 1778 | }
|
|---|
| 1779 | }
|
|---|
| 1780 |
|
|---|
| 1781 | static byte Med3(byte a, byte b, byte c )
|
|---|
| 1782 | {
|
|---|
| 1783 | byte t;
|
|---|
| 1784 | if (a > b)
|
|---|
| 1785 | {
|
|---|
| 1786 | t = a;
|
|---|
| 1787 | a = b;
|
|---|
| 1788 | b = t;
|
|---|
| 1789 | }
|
|---|
| 1790 | if (b > c)
|
|---|
| 1791 | {
|
|---|
| 1792 | t = b;
|
|---|
| 1793 | b = c;
|
|---|
| 1794 | c = t;
|
|---|
| 1795 | }
|
|---|
| 1796 | if (a > b)
|
|---|
| 1797 | {
|
|---|
| 1798 | b = a;
|
|---|
| 1799 | }
|
|---|
| 1800 | return b;
|
|---|
| 1801 | }
|
|---|
| 1802 |
|
|---|
| 1803 | struct StackElement
|
|---|
| 1804 | {
|
|---|
| 1805 | public int ll;
|
|---|
| 1806 | public int hh;
|
|---|
| 1807 | public int dd;
|
|---|
| 1808 | }
|
|---|
| 1809 |
|
|---|
| 1810 | #region Instance Fields
|
|---|
| 1811 | bool isStreamOwner = true;
|
|---|
| 1812 |
|
|---|
| 1813 | /*--
|
|---|
| 1814 | index of the last char in the block, so
|
|---|
| 1815 | the block size == last + 1.
|
|---|
| 1816 | --*/
|
|---|
| 1817 | int last;
|
|---|
| 1818 |
|
|---|
| 1819 | /*--
|
|---|
| 1820 | index in zptr[] of original string after sorting.
|
|---|
| 1821 | --*/
|
|---|
| 1822 | int origPtr;
|
|---|
| 1823 |
|
|---|
| 1824 | /*--
|
|---|
| 1825 | always: in the range 0 .. 9.
|
|---|
| 1826 | The current block size is 100000 * this number.
|
|---|
| 1827 | --*/
|
|---|
| 1828 | int blockSize100k;
|
|---|
| 1829 |
|
|---|
| 1830 | bool blockRandomised;
|
|---|
| 1831 |
|
|---|
| 1832 | int bytesOut;
|
|---|
| 1833 | int bsBuff;
|
|---|
| 1834 | int bsLive;
|
|---|
| 1835 | IChecksum mCrc = new StrangeCRC();
|
|---|
| 1836 |
|
|---|
| 1837 | bool[] inUse = new bool[256];
|
|---|
| 1838 | int nInUse;
|
|---|
| 1839 |
|
|---|
| 1840 | char[] seqToUnseq = new char[256];
|
|---|
| 1841 | char[] unseqToSeq = new char[256];
|
|---|
| 1842 |
|
|---|
| 1843 | char[] selector = new char[BZip2Constants.MaximumSelectors];
|
|---|
| 1844 | char[] selectorMtf = new char[BZip2Constants.MaximumSelectors];
|
|---|
| 1845 |
|
|---|
| 1846 | byte[] block;
|
|---|
| 1847 | int[] quadrant;
|
|---|
| 1848 | int[] zptr;
|
|---|
| 1849 | short[] szptr;
|
|---|
| 1850 | int[] ftab;
|
|---|
| 1851 |
|
|---|
| 1852 | int nMTF;
|
|---|
| 1853 |
|
|---|
| 1854 | int[] mtfFreq = new int[BZip2Constants.MaximumAlphaSize];
|
|---|
| 1855 |
|
|---|
| 1856 | /*
|
|---|
| 1857 | * Used when sorting. If too many long comparisons
|
|---|
| 1858 | * happen, we stop sorting, randomise the block
|
|---|
| 1859 | * slightly, and try again.
|
|---|
| 1860 | */
|
|---|
| 1861 | int workFactor;
|
|---|
| 1862 | int workDone;
|
|---|
| 1863 | int workLimit;
|
|---|
| 1864 | bool firstAttempt;
|
|---|
| 1865 | int nBlocksRandomised;
|
|---|
| 1866 |
|
|---|
| 1867 | int currentChar = -1;
|
|---|
| 1868 | int runLength;
|
|---|
| 1869 | uint blockCRC, combinedCRC;
|
|---|
| 1870 | int allowableBlockSize;
|
|---|
| 1871 | Stream baseStream;
|
|---|
| 1872 | bool disposed_;
|
|---|
| 1873 | #endregion
|
|---|
| 1874 | }
|
|---|
| 1875 | }
|
|---|
| 1876 |
|
|---|
| 1877 | /* This file was derived from a file containing this license:
|
|---|
| 1878 | *
|
|---|
| 1879 | * This file is a part of bzip2 and/or libbzip2, a program and
|
|---|
| 1880 | * library for lossless, block-sorting data compression.
|
|---|
| 1881 | *
|
|---|
| 1882 | * Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
|
|---|
| 1883 | *
|
|---|
| 1884 | * Redistribution and use in source and binary forms, with or without
|
|---|
| 1885 | * modification, are permitted provided that the following conditions
|
|---|
| 1886 | * are met:
|
|---|
| 1887 | *
|
|---|
| 1888 | * 1. Redistributions of source code must retain the above copyright
|
|---|
| 1889 | * notice, this list of conditions and the following disclaimer.
|
|---|
| 1890 | *
|
|---|
| 1891 | * 2. The origin of this software must not be misrepresented; you must
|
|---|
| 1892 | * not claim that you wrote the original software. If you use this
|
|---|
| 1893 | * software in a product, an acknowledgment in the product
|
|---|
| 1894 | * documentation would be appreciated but is not required.
|
|---|
| 1895 | *
|
|---|
| 1896 | * 3. Altered source versions must be plainly marked as such, and must
|
|---|
| 1897 | * not be misrepresented as being the original software.
|
|---|
| 1898 | *
|
|---|
| 1899 | * 4. The name of the author may not be used to endorse or promote
|
|---|
| 1900 | * products derived from this software without specific prior written
|
|---|
| 1901 | * permission.
|
|---|
| 1902 | *
|
|---|
| 1903 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
|---|
| 1904 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|---|
| 1905 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|---|
| 1906 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|---|
| 1907 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|---|
| 1908 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
|---|
| 1909 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|---|
| 1910 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|---|
| 1911 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|---|
| 1912 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|---|
| 1913 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|---|
| 1914 | *
|
|---|
| 1915 | * Java version ported by Keiron Liddle, Aftex Software <keiron@aftexsw.com> 1999-2001
|
|---|
| 1916 | */
|
|---|