#!/usr/bin/python
########################################################################
# Copyright (C) 2010-2021 VMware, Inc.                                 #
# All Rights Reserved                                                  #
########################################################################

"Parse vmtar format."

import gzip
import struct
import os
import sys

from .Misc import byteToStr, isString, seekable

BUFFER_SIZE = 2048

TAR_TYPE_FILE         = b'0'
TAR_TYPE_LINK         = b'1'
TAR_TYPE_SYMLINK      = b'2'
TAR_TYPE_CHARDEV      = b'3'
TAR_TYPE_BLOCKDEV     = b'4'
TAR_TYPE_DIR          = b'5'
TAR_TYPE_FIFO         = b'6'
TAR_TYPE_SHAREDFILE   = b'7'
TAR_TYPE_GNU_LONGLINK = b'K'
TAR_TYPE_GNU_LONGNAME = b'L'

TAR_REGULAR_TYPES = (TAR_TYPE_FILE, TAR_TYPE_SHAREDFILE)

def StrFromNTS(data):
   i = data.find(b"\x00")
   if i == -1:
      # Assume it is a full string.
      return byteToStr(data)
   return byteToStr(data[:i])

class VmTarError(Exception):
   pass

class VmTarInfo(object):
   "Class to represent one member of a vmtar archive."
   MAGIC = b"visor "
   VERSION = "40"

   def __init__(self, name=""):
      self.name = name
      self.mode = 0o644
      self.uid = 0
      self.gid = 0
      self.size = 0
      self.mtime = 0
      self.chksum = 0
      self.type = TAR_TYPE_FILE
      self.linkname = ""
      self.uname = ""
      self.gname = ""
      self.devmajor = 0
      self.devminor = 0
      self.prefix = ""
      self.offset = 0
      self.textoffset = 0
      self.textsize = 0
      self.numfixuppgs = 0

   def isreg(self):
      return self.type in TAR_REGULAR_TYPES

   def isfile(self):
      return self.isreg()

   @classmethod
   def FromBuf(cls, buf):
      """Initialize VmTarInfo object from raw vmtar header.
            Parameters:
               * buf - a 512-byte string.
            Returns: A new VmTarInfo object, or None if end of headers.
            Raises:
               * VmTarError - If the tar magic or checksum are incorrect.
      """
      if sys.version_info[0] >= 3:
         NULL = 0
      else:
         NULL = b"\x00"
      if buf[0] == NULL:
         return None

      if len(buf) < 512:
         raise VmTarError("Header too short")
      magic = buf[257:263]
      if magic != cls.MAGIC:
         raise VmTarError("Invalid header magic")
      version = buf[263:265] # Unused, apparently.
      chksum = int(StrFromNTS(buf[148:156]), 8)
      bytesum = sum(struct.unpack("148B", buf[:148])) + 256 + \
                sum(struct.unpack("356B", buf[156:512]))
      ubytesum = sum(struct.unpack("148b", buf[:148])) + 256 + \
                 sum(struct.unpack("356b", buf[156:512]))
      if chksum != bytesum and chksum != ubytesum:
         raise VmTarError("Invalid header checksum")
      obj = cls(StrFromNTS(buf[:100]))
      obj.mode = int(StrFromNTS(buf[100:108]) or "0", 8)
      try:
         obj.uid = int(StrFromNTS(buf[108:116]) or "0", 8)
      except ValueError:
         n = 0
         for i in range(7):
            n <<= 8
            n += ord(buf[109 + i:110 + i])
         obj.uid = n
      try:
         obj.gid = int(StrFromNTS(buf[116:124]) or "0", 8)
      except ValueError:
         n = 0
         for i in range(7):
            n <<= 8
            n += ord(buf[117 + i:118 + i])
         obj.gid = n
      obj.size = int(StrFromNTS(buf[124:136]) or "0", 8)
      obj.mtime = int(StrFromNTS(buf[136:148]) or "0", 8)
      obj.chksum = chksum
      obj.type = buf[156:157]
      obj.linkname = StrFromNTS(buf[157:257])
      obj.uname = StrFromNTS(buf[265:297])
      obj.gname = StrFromNTS(buf[297:329])
      obj.devmajor = int(StrFromNTS(buf[329:337]) or "0", 8)
      obj.devminor = int(StrFromNTS(buf[337:345]) or "0", 8)
      obj.prefix = buf[345:496]
      # GNU makes every numeric field a human-readable octal. vmtar breaks that
      # convention, and uses packed values for these.
      obj.offset = struct.unpack("<I", buf[496:500])[0]
      obj.textoffset = struct.unpack("<I", buf[500:504])[0]
      obj.textsize = struct.unpack("<I", buf[504:508])[0]
      obj.numfixuppgs = struct.unpack("<I", buf[508:512])[0]
      return obj

class VmTar(object):
   def __init__(self, name=None, mode="r|gz", fileobj=None):
      """Class constructor.
            Parameters:
               * name    - The name of a file. May be None if fileobj is
                           specified.
               * mode    - The file mode. Currently only reading is supported.
                           May also append |gz to specify that file or stream
                           is gzip-compressed. Defaults to r|gz.
               * fileobj - Read from a file-like object instead of the file
                           given by name.
            Raises:
               * IOError    - If file cannot be opened or an unsupported mode
                              is given.
               * VmTarError - The data is not in the correct format.
      """
      compression = None
      try:
         mode, compression = mode.split("|")
      except Exception:
         mode = mode

      if not "b" in mode:
         mode += "b"

      if not fileobj and not name:
         raise VmTarError("Name and fileobj cannot be both empty.")

      if name and compression == "gz":
         fileobj = gzip.GzipFile(filename=name, mode=mode)
      elif name:
         fileobj = open(name, mode)
      elif fileobj and compression == "gz":
         fileobj = gzip.GzipFile(mode=mode, fileobj=fileobj)

      self._fileobj = fileobj
      self.members = list()

      self._ReadHeaders()

   def _ReadHeaders(self):
      hdr = VmTarInfo.FromBuf(self._fileobj.read(512))
      while hdr is not None:
         # If hdr type indicates longlink or longname, then the 512-byte block
         # following the header contains the long link name or file name. Note
         # that there can be a 2048-byte sequence of
         #    longlink hdr + longlink + longnamehdr + longname
         # in cases where both a link target and link source are long names.
         longlink = None
         longname = None
         while hdr.type in (TAR_TYPE_GNU_LONGLINK, TAR_TYPE_GNU_LONGNAME):
            if hdr.type == TAR_TYPE_GNU_LONGLINK:
               longlink = self._fileobj.read(512)[:hdr.size-1]
            elif hdr.type == TAR_TYPE_GNU_LONGNAME:
               longname = self._fileobj.read(512)[:hdr.size-1]
            hdr = VmTarInfo.FromBuf(self._fileobj.read(512))
         if longlink is not None:
            hdr.linkname = byteToStr(longlink)
         if longname is not None:
            hdr.name = byteToStr(longname)
         self.members.append(hdr)
         hdr = VmTarInfo.FromBuf(self._fileobj.read(512))

   def getmember(self, name):
      for info in self.members:
         if info.name == name:
            return info
      return None

   def getmembers(self):
      return self.members

   def getnames(self):
      return [info.name for info in self.members]

   def extract(self, member, path=""):
      """Extract a member from the archive to the current working directory,
         using its full name or its VmTarInfo object. A directory can be
         specified as the extract location.
      """
      if isString(member):
         tarInfo = self.getmember(member)
         if tarInfo is None:
            raise ValueError("Member %s is not found" % member)
      else:
         tarInfo = member

      if not tarInfo.isreg():
         raise NotImplementedError("Only regular file is supported for extract")
      if not seekable(self._fileobj):
         raise OSError("File object is not seekable")
      if tarInfo.size == 0:
         raise ValueError("File length is 0")
      if self._fileobj.closed:
         raise OSError("File is already closed")

      # Create upper directories.
      targetPath = os.path.join(path, tarInfo.name)
      upperDir = os.path.dirname(targetPath)
      if upperDir and not os.path.exists(upperDir):
         os.makedirs(upperDir)

      self._fileobj.seek(tarInfo.offset)
      with open(os.path.join(path, tarInfo.name), 'wb') as destObj:
         sizeToRead = tarInfo.size
         while sizeToRead > 0:
            readSize = min(BUFFER_SIZE, sizeToRead)
            destObj.write(self._fileobj.read(readSize))
            sizeToRead -= readSize
      self._fileobj.seek(0)

   def close(self):
      self._fileobj.close()

def Open(name=None, mode="r", fileobj=None):
   if fileobj is None and not mode.endswith("|gz"):
      try:
         vtar = VmTar(name, "%s|gz" % mode, fileobj)
      except IOError:
         vtar = VmTar(name, mode, fileobj)
   else:
      vtar = VmTar(name, mode, fileobj)
   return vtar

if __name__ == "__main__":
   for fn in sys.argv[1:]:
      vtar = Open(fn)
      for name in vtar.getnames():
         sys.stdout.write("%s\n" % name)
      vtar.close()

