#!/usr/bin/python
########################################################################
# Copyright (C) 2011-2018 VMWare, Inc.
# All Rights Reserved
########################################################################
#
# This module implements some work-arounds for broken HTTPS proxy support in
# the httplib and urllib2 modules included with the Python distribution.
# See http://bugs.python.org/issue1424152 for more information.
#
# Additionally, getproxies() in urllib is broken on Windows:
# 1. If the 'Use the same proxy server for all protocols' option is checked in
#    Internet Options -> Local Area Network (LAN) Settings -> Proxy Settings,
#    then proxies are returned for 'ftp' and 'http' protocols, but not 'https'.
#    Maybe the author at the time realized that HTTPS proxy support was
#    broken, and so didn't bother? Who knows.
# 2. If the 'Use the same proxy server for all protocols' option is not
#    checked, then getproxies() will return a proxy for the 'https'
#    protocol (assuming one was configured). However, it will return
#    the proxy URL as https://proxy.vmware.com:3128 when it should be
#    http://proxy.vmware.com:3128.
#
# We try to fix these issues by re-defining getproxies_registry() in urllib
# on Windows platforms.
#
# This code has a lot of dependencies on implementation-specific things in
# httplib, urllib and urllib2, and is likely to break if/when they are updated.
# It seems to work, however, with Python versions 2.4 - 2.6.

import os
import socket
import sys

if sys.version_info[0] >= 3:
   from urllib.request import (addinfourl, AbstractHTTPHandler, URLError,
                               build_opener)
   from urllib.request import ProxyHandler as _ProxyHandler
   import urllib.request

   from http.client import error as httpError
   from http.client import HTTPSConnection

else:
   from urllib import addinfourl
   import urllib
   from urllib2 import AbstractHTTPHandler, URLError, build_opener
   from urllib2 import ProxyHandler as _ProxyHandler

   from httplib import error as httpError
   from httplib import HTTPSConnection, FakeSocket

if os.name == 'nt':
   import re
   def getproxies_registry():
      # Copied from urllib.py in Python 2.6.1.
      proxies = {}
      try:
         import _winreg
      except ImportError:
         return proxies
      try:
         internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
            r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
         proxyEnable = _winreg.QueryValueEx(internetSettings, 'ProxyEnable')[0]
         if proxyEnable:
            proxyServer = str(_winreg.QueryValueEx(internetSettings,
                                                   'ProxyServer')[0])
            if '=' in proxyServer:
               for p in proxyServer.split(';'):
                  protocol, address = p.split('=', 1)
                  if not re.match('^([^/:]+)://', address):
                     # This line is one fix:
                     address = 'http://%s' % address
                  proxies[protocol] = address
            else:
               # These 4 lines are the other fix:
               if not re.match('^([^/:]+)://', proxyServer):
                  proxyServer = 'http://%s' % proxyServer
               for key in ('http', 'ftp', 'https'):
                  proxies[key] = proxyServer
         internetSettings.Close()
      except (WindowsError, ValueError, TypeError):
         pass
      return proxies
   # Note: This will replace the function for everyone who imports urllib, not
   #       just this module.
   if sys.version_info[0] >= 3:
      urllib.request.getproxies_registry = getproxies_registry
   else:
      urllib.getproxies_registry = getproxies_registry

class ConnectProxyError(httpError):
   pass

class HTTPSProxyConnection(HTTPSConnection):
   '''Like httplib.HTTPSConnection, but we establish a tunnel through a proxy
      first.
   '''
   def __init__(self, host, port=None, key_file=None, cert_file=None,
                strict=None, proxy=None):
      HTTPSConnection.__init__(self, host, port, key_file, cert_file, strict)
      if proxy and proxy.count('://'):
         proto, proxy = proxy.split('://', 1)
         if proto != 'http':
            raise ValueError("Unknown proxy protocol '%s'" % proto)
      self.proxy = proxy
      self.hostport = host

   def connect(self):
      if not self.proxy:
         return HTTPSConnection.connect(self)

      # Let's talk to our proxy server:
      if ":" not in self.proxy:
         msg = "Please specify the proxy server %s in the right format: proxy-url:port." % self.proxy
         raise ValueError(msg)

      proxyport = self.proxy.split(':')
      sock = socket.create_connection((proxyport[0],int(proxyport[1])))

      # Then connect to the secure host:
      if ":" not in self.hostport:
         # If the port number is not specified, use the default port number
         CONNECT = 'CONNECT %s:%d HTTP/1.1\r\n\r\n' % (self.hostport, 443)
      else:
         CONNECT = 'CONNECT %s HTTP/1.1\r\n\r\n' % self.hostport

      sock.send(CONNECT.encode())
      s = sock.recv(128)
      if sys.version_info[0] >= 3:
         s = s.decode()
      if s.split()[1] != str(200):
         raise ConnectProxyError("Error connecting to proxy: %s " % proxyport)

      # If our proxy server gave us a green light, anything we send/receive
      # on this socket from here on out should be tunneled to the remote
      # server.  Re-use the socket from our connection to the proxy,
      # and set up our SSL connection now.
      try:
         import ssl
         self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
      # Other than ESXi, there are many other occasions where people may use
      # esximage module, so just in case import ssl fails.
      except ImportError:
         if sys.version_info[0] >= 3:
            # Python3 recommends to use ssl.wrap_socket (there is no
            # httplib.FakeSocket() function in python3).
            raise
         sslsock = socket.ssl(sock, self.key_file, self.cert_file)
         self.sock = httplib.FakeSocket(sock, sslsock)

class ProxyHandler(_ProxyHandler, AbstractHTTPHandler):
   '''Sub-classes _ProxyHandler in order to handle https:// connections
      using the HTTPSProxyConnection class.
   '''
   def __init__(self, *args, **kwargs):
      AbstractHTTPHandler.__init__(self)
      _ProxyHandler.__init__(self, *args, **kwargs)
      # See the call to setattr in _ProxyHandler.__init__ to understand
      # why this is (unfortunately) necessary.
      if hasattr(self, 'https_open'):
         try:
            import ssl
         except:
            pass
         else:
            self.https_open = self._https_open

   https_request = AbstractHTTPHandler.do_request_

   def _https_open(self, req):
      # This is essentially duplicated from
      # AbstractHTTPHandler.do_open.  :-(
      if sys.version_info[0] >= 3:
         host = req.host
      else:
         host = req.get_host()
      if not host:
         raise URLError('no host given')

      # Here is our change to use HTTPSProxyConnection if we have an https
      # proxy.
      if 'https' in self.proxies:
         h = HTTPSProxyConnection(host, proxy=self.proxies['https'])
      else:
         h = HTTPSConnection(host)
      # End our changes.

      h.set_debuglevel(self._debuglevel)

      headers = dict(req.headers)
      headers.update(req.unredirected_hdrs)
      headers["Connection"] = "close"
      headers = dict(
         (name.title(), val) for name, val in headers.items())

      if sys.version_info[0] >= 3:
         selector = req.selector
      else:
         selector = req.get_selector()
      try:
         h.request(req.get_method(), selector, req.data, headers)
         r = h.getresponse()
      except socket.error as err:
         raise URLError(err)

      r.recv = r.read

      if sys.version_info[0] >= 3:
         fp = r.fp
      else:
         fp = socket._fileobject(r)

      resp = addinfourl(fp, r.msg, req.get_full_url())
      resp.code = r.status
      resp.msg = r.reason
      return resp

if __name__ == '__main__':
   import sys, optparse

   parser = optparse.OptionParser()
   parser.set_usage('%prog [options] URL')
   parser.add_option('-p', '--proxy', dest='proxy',
                     metavar='proto://host:port')
   options, args = parser.parse_args()
   if len(args) < 1:
      parser.error('URL required')
   if len(args) > 1:
      parser.error('Too many arguments')

   proxies = dict()
   if options.proxy:
      proxies['ftp'] = options.proxy
      proxies['http'] = options.proxy
      proxies['https'] = options.proxy
   opener = build_opener(ProxyHandler(proxies))
   f = opener.open(args[0])
   print(f.read())
