"""Functions that read or write gzipped data stream from a raw data file"""

# based on gzip.py file in python distribution 2.5.2

# based on Andrew Kuchling's minigzip.py distributed with the zlib module

from __future__ import print_function

import gzip
import struct, sys
import zlib

__all__ = ["GunzipFile"]

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT  = 1, 2, 4, 8, 16
FHFIXED = 256
OPTIONAL_HEADERS = (FEXTRA, FNAME, FCOMMENT, FHCRC)

READ, WRITE = 1, 2

def U32(i):
    """Return i as an unsigned integer, assuming it fits in 32 bits.

    If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
    """
    if i < 0:
        i += (1 << 32)
    return i

def LOWU32(i):
    """Return the low-order 32 bits of an int, as a non-negative int."""
    return i & 0xFFFFFFFF

class HeaderParser(object):
   '''Class to parse gzip format member header
   '''
   def __init__(self, gunzipfile):
      self._gunzipfile = gunzipfile
      self._newmember = True
      self._optionals = [FHFIXED,]

   def _setbuf(self, buf):
      self._gunzipfile.buf = buf

   buf = property(lambda self: self._gunzipfile.buf, _setbuf)

   def update(self):
      '''Parse gzip stream from buf, return ungzip raw data.
      '''
      parsedone = True
      while self._optionals and parsedone:
         parsedone = False

         # magic and compression method
         if self._optionals[0] == FHFIXED and len(self._gunzipfile.buf) > 10:

            (magic, method, flag) = struct.unpack("<2sBB",
                                                  self._gunzipfile.buf[:4])
            if magic !=  b'\037\213':
               raise IOError('Not a gzipped file')

            if method != 8:
               raise IOError('Unknown compression method')

            # optional fields
            for hdr in OPTIONAL_HEADERS:
               if hdr & flag:
                  self._optionals.append(hdr)
            self._nextfield(10)
            parsedone = True

         elif self._optionals[0] == FEXTRA:
            if len(self.buf) > 2:
               xlen = struct.unpack('<H', self.buf[:2])
               fieldlen = xlen + 2
               if fieldlen <= len(self._gunzipfile.buf):
                  self._nextfield(fieldlen)
                  parsedone = True

         elif self._optionals[0] in (FNAME, FCOMMENT) and len(self.buf):
            try:
               ind = self.buf.index(b'\000')
               self._nextfield(ind +1)
               parsedone = True
            except:
               pass

         elif self._optionals[0] == FHCRC and len(self.buf) >= 2:
            self._nextfield(2)
            parsedone = True

      rawdata = b''
      if not self._optionals:
         # Header is already parsed, move to dataparser
         rawdata = self._gunzipfile.nextparser()

      return rawdata

   def _nextfield(self, size):
      self.buf = self.buf[size:]
      self._optionals.pop(0)

   def close(self):
      if self._optionals and self._optionals[0] != FHFIXED:
         raise IOError('Unexpected EOF')

class GzipDataParser(object):
   '''Class to parse gzip data stream and trailer from buf.
   '''
   def __init__(self, gunzipfile):
      self._gunzipfile = gunzipfile
      self._decompress =  zlib.decompressobj(-zlib.MAX_WBITS)
      self._decompressdone = False
      self.size = 0
      self.crc = zlib.crc32(b"")

   def _setbuf(self, buf):
      self._gunzipfile.buf = buf

   buf = property(lambda self: self._gunzipfile.buf, _setbuf)

   def update(self):
      rawdata = b''
      if not self._decompressdone:
         if sys.version_info[0] >= 3:
            rawdata = self._decompress.decompress(self.buf)
         else:
            rawdata = self._decompress.decompress(buffer(self.buf))

         self.crc =  zlib.crc32(rawdata, self.crc)
         self.size = self.size + len(rawdata)
         if self._decompress.unused_data:
            self.buf = self.buf[-len(self._decompress.unused_data):]
            self._decompressdone = True
            rawdata += self._checkend()
         else:
            self.buf = bytearray()
      else:
         rawdata += self._checkend()
      return rawdata

   def close(self):
      raise  EOFError('Reached EOF')

   def _checkend(self):
      '''verify tailer, call gunzipfile to prepare for next member.
      '''
      if len(self.buf) >= 8:
         self._checkcrc()
         return self._gunzipfile.newmember()
      else:
         return b''

   def _checkcrc(self):
      crc32, isize = struct.unpack('<ll', self.buf[:8])
      self.buf = self.buf[8:]

      if U32(crc32) != U32(self.crc):
         raise IOError("CRC check failed")
      elif isize != LOWU32(self.size):
         raise IOError("Incorrect length of data produced")

class GunzipFile(object):
   '''The GunzipFile class is constructed with an uncompressed data file or file
      object. Writing to GunzipFile instance creates uncompressed file. Reading
      from GunzipFile instance yields a gzip compressed data stream.
   '''
   MAX_READ_SIZE = 1024 * 1024 * 10
   START_READ_SIZE = 1024

   def __init__(self, filename=None, mode=None,
                 compresslevel=9, fileobj=None):
      self.myfileobj = None
      # guarantee the file is opened in binary mode on platforms
      # that care about that sort of thing
      if mode and 'b' not in mode:
         mode += 'b'
      if fileobj is None:
         fileobj = self.myfileobj = open(filename, mode or 'rb')

      if filename is None:
         if hasattr(fileobj, 'name'):
            filename = fileobj.name
         else:
            filename = ''

      if mode is None:
         if hasattr(fileobj, 'mode'):
            mode = fileobj.mode
         else:
            mode = 'rb'

      self.parsers = []
      if mode[0:1] == 'r':
         self.mode = READ
         self.size = 0
         self.crc = zlib.crc32(b"")


         # RFC 1952 requires the FNAME field to be Latin-1.
         fname = b'\x00'

         # gzip header, no time and file name info
         self.buf = b''.join([b'\037\213\010', fname,
                             struct.pack("<L", 0), b'\002\377'])
         self.buf = bytearray(self.buf)

         self._compress = zlib.compressobj(compresslevel,
                                           zlib.DEFLATED,
                                           -zlib.MAX_WBITS,
                                           zlib.DEF_MEM_LEVEL,
                                             0)
      elif mode[0:1] == 'w' or mode[0:1] == 'a':
         self.mode = WRITE
         self.buf = bytearray()
         self.newmember()

      else:
         raise IOError("Mode " + mode + " not supported")

      self.fileobj = fileobj

   @property
   def parser(self):
      return self.parsers and self.parsers[0] or None

   def write(self, data):
      '''data is gzipped data stream. '''
      if self.mode != WRITE:
         import errno
         raise IOError(errno.EBADF, "write() on read-only GzipFile object")

      assert self.parser is not None, 'No parser in the queue'

      self.buf += data
      rawdata = self.parser.update()
      if rawdata:
         self.fileobj.write(rawdata)

   def read(self, size=-1):
      '''return gzipped data steam of raw data file. gzip header doesn't have time
         and file name info.
         Parameters:
            * size - The size to read from the stream. If not specified, return
                     the whole data from the stream.
      '''
      if self.mode != READ:
         import errno
         raise IOError(errno.EBADF, "read() on write-only GzipFile object")

      if len(self.buf) <= 0 and self.fileobj is None:
         return b''

      readsize = self.START_READ_SIZE
      if size < 0:
         # read all
         try:
            while True:
               readsize = min(self.MAX_READ_SIZE, readsize * 2)
               self._read(readsize)
         except EOFError:
            size = len(self.buf)
      else:
         # read size as required
         try:
            while size > len(self.buf):
               readsize = min(self.MAX_READ_SIZE, readsize * 2)
               self._read(readsize)
         except EOFError:
            if size < len(self.buf):
               size = len(self.buf)

      chunk = self.buf[:size]
      self.buf = self.buf[size:]
      return chunk

   def _read(self, size=1024):
      if self.fileobj is None:
         raise EOFError("Reached EOF")

      data = self.fileobj.read(size)
      if data:
         self.size += len(data)
         self.crc = zlib.crc32(data, self.crc)
         compressed = self._compress.compress(data)
         self.buf += compressed
      else:
         compressed = self._compress.flush()
         self.buf += compressed
         self.fileobj = None
         # Adding trailer
         tail = struct.pack("<LL", LOWU32(self.crc), LOWU32(self.size))
         self.buf +=tail

   def close(self):
      if self.mode == READ:
         self.fileobj = None
      elif self.mode == WRITE:
         self.parser.close()
         self.fileobj = None

      if self.myfileobj:
         self.myfileobj.close()
         self.myfileobj = None

   def newmember(self):
      '''Pop current parser and append parsers for next member and start
         parsing.
         Returns: ungzip raw data
      '''
      if self.parsers:
         self.parsers.pop(0)
      self.parsers.extend([HeaderParser(self), GzipDataParser(self)])
      return self.parser.update()

   def nextparser(self):
      '''Pop current parser, start parsing if there is a parser in the queue
         Returns: ungzip raw data
      '''
      assert self.parser
      self.parsers.pop(0)
      if self.parser:
         return self.parser.update()

class GzipFile(gzip.GzipFile):
   def _write_gzip_header(self):
      # Static header for ESX gzip format: 2-byte gzip magic + compression
      # method (8, "deflate") + flags (0) + 4-byte timestamp (0's) + extra
      # flags (2, max compression) + OS (255, unknown). All optional fields are
      # intentionally omitted. This enables re-compressing data and getting
      # consistent output that can be matched against a digest.
      self.fileobj.write(b"\037\213\010\000\000\000\000\000\002\377")

def getStandardIOasBytes(iostream):
  if sys.version_info[0] >= 3:
    return iostream.buffer
  else:
    return iostream

def _test():
    # Act like gunzip; with -z, act like gzip.
    # The input file is not deleted, however, nor are any other gzip
    # options or features supported.
    args = sys.argv[1:]
    compress = args and args[0] == "-z"
    if compress:
        args = args[1:]
    if not args:
        args = ["-"]

    for arg in args:
        if compress:
            if arg == "-":
                f = GunzipFile(filename="", mode="rb",
                               fileobj=getStandardIOasBytes(sys.stdin))
                g = getStandardIOasBytes(sys.stdout)
            else:
                f = GunzipFile(arg, "rb")
                g = open(arg + '.gz', "wb")
        else:
            if arg == "-":
                f = getStandardIOasBytes(sys.stdin)
                g = GunzipFile(filename="", mode="wb",
                               fileobj=getStandardIOasBytes(sys.stdout))
            else:
                if arg[-3:] != ".gz":
                    print("filename doesn't end in .gz: %s" % repr(arg))
                    continue
                f = open(arg, "rb")
                g = GunzipFile(arg[:-3], "wb")

        while True:
            # Use 7 here to make sure that _checkend is fully covered in test.
            chunk = f.read(7)
            if not chunk:
                break
            g.write(chunk)

        if g is not sys.stdout:
            g.close()
        if f is not sys.stdin:
            f.close()

if __name__ == '__main__':
    _test()

