Source code for geonode.proxy.views

#########################################################################
#
# Copyright (C) 2016 OSGeo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
import io
import os
import re
import gzip
import logging
import traceback
import zipstream

from hyperlink import URL
from urllib.parse import urlparse, urlsplit, urljoin

from django.conf import settings
from django.template import loader
from django.http import HttpResponse, StreamingHttpResponse
from django.views.generic import View
from distutils.version import StrictVersion
from django.http.request import validate_host
from django.utils.translation import ugettext as _
from django.views.decorators.csrf import requires_csrf_token

from geonode.layers.models import Dataset
from geonode.upload.models import Upload
from geonode.base.models import ResourceBase
from geonode.storage.manager import storage_manager
from geonode.utils import (
    resolve_object,
    check_ogc_backend,
    get_headers,
    http_client,
    json_response,
    extract_ip_or_domain,
)
from geonode.base.enumerations import LINK_TYPES as _LT

from geonode import geoserver  # noqa
from geonode.base import register_event
from geonode.base.auth import get_auth_user, get_token_from_auth_header

[docs] BUFFER_CHUNK_SIZE = 64 * 1024
[docs] TIMEOUT = 30
[docs] logger = logging.getLogger(__name__)
[docs] ows_regexp = re.compile(r"^(?i)(version)=(\d\.\d\.\d)(?i)&(?i)request=(?i)(GetCapabilities)&(?i)service=(?i)(\w\w\w)$")
@requires_csrf_token
[docs] def proxy( request, url=None, response_callback=None, sec_chk_hosts=True, sec_chk_rules=True, timeout=None, allowed_hosts=[], headers=None, access_token=None, **kwargs, ): # Request default timeout from geonode.geoserver.helpers import ogc_server_settings if not timeout: timeout = getattr(ogc_server_settings, "TIMEOUT", TIMEOUT) # Security rules and settings PROXY_ALLOWED_HOSTS = getattr(settings, "PROXY_ALLOWED_HOSTS", ()) # Sanity url checks if "url" not in request.GET and not url: return HttpResponse( "The proxy service requires a URL-encoded URL as a parameter.", status=400, content_type="text/plain" ) raw_url = url or request.GET["url"] raw_url = urljoin(settings.SITEURL, raw_url) if raw_url.startswith("/") else raw_url url = urlsplit(raw_url) scheme = str(url.scheme) locator = str(url.path) if url.query != "": locator += f"?{url.query}" if url.fragment != "": locator += f"#{url.fragment}" # White-Black Listing Hosts site_url = urlsplit(settings.SITEURL) if sec_chk_hosts and not settings.DEBUG: # Attach current SITEURL if site_url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (site_url.hostname,) # Attach current hostname hostname = (ogc_server_settings.hostname,) if ogc_server_settings else () if hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += hostname # Check OWS regexp if url.query and ows_regexp.match(url.query): ows_tokens = ows_regexp.match(url.query).groups() if ( len(ows_tokens) == 4 and "version" == ows_tokens[0] and StrictVersion(ows_tokens[1]) >= StrictVersion("1.0.0") and StrictVersion(ows_tokens[1]) <= StrictVersion("3.0.0") and ows_tokens[2].lower() in ("getcapabilities") and ows_tokens[3].upper() in ("OWS", "WCS", "WFS", "WMS", "WPS", "CSW") ): if url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (url.hostname,) # Check Remote Services base_urls from geonode.services.models import Service for _s in Service.objects.all(): _remote_host = urlsplit(_s.base_url).hostname PROXY_ALLOWED_HOSTS += (_remote_host,) if not validate_host(extract_ip_or_domain(raw_url), PROXY_ALLOWED_HOSTS): return HttpResponse( "DEBUG is set to False but the host of the path provided to the proxy service" " is not in the PROXY_ALLOWED_HOSTS setting.", status=403, content_type="text/plain", ) # Security checks based on rules; allow only specific requests if sec_chk_rules: # TODO: Not yet implemented pass # Collecting headers and cookies if not headers: headers, access_token = get_headers(request, url, raw_url, allowed_hosts=allowed_hosts) if not access_token: auth_header = None if "Authorization" in headers: auth_header = headers["Authorization"] elif "HTTP_AUTHORIZATION" in request.META: auth_header = request.META.get("HTTP_AUTHORIZATION", request.META.get("HTTP_AUTHORIZATION2")) if auth_header: access_token = get_token_from_auth_header(auth_header, create_if_not_exists=True) user = get_auth_user(access_token) # Inject access_token if necessary parsed = urlparse(raw_url) parsed._replace(path=locator.encode("utf8")) if parsed.netloc == site_url.netloc and scheme != site_url.scheme: parsed = parsed._replace(scheme=site_url.scheme) _url = parsed.geturl() # Some clients / JS libraries generate URLs with relative URL paths, e.g. # "http://host/path/path/../file.css", which the requests library cannot # currently handle (https://github.com/kennethreitz/requests/issues/2982). # We parse and normalise such URLs into absolute paths before attempting # to proxy the request. _url = URL.from_text(_url).normalize().to_text() if request.method == "GET" and access_token and "access_token" not in _url: query_separator = "&" if "?" in _url else "?" _url = f"{_url}{query_separator}access_token={access_token}" _data = request.body.decode("utf-8") # Avoid translating local geoserver calls into external ones if check_ogc_backend(geoserver.BACKEND_PACKAGE): from geonode.geoserver.helpers import ogc_server_settings _url = _url.replace(f"{settings.SITEURL}geoserver", ogc_server_settings.LOCATION.rstrip("/")) _data = _data.replace(f"{settings.SITEURL}geoserver", ogc_server_settings.LOCATION.rstrip("/")) response, content = http_client.request( _url, method=request.method, data=_data.encode("utf-8"), headers=headers, timeout=timeout, user=user ) if response is None: return HttpResponse(content=content, reason=content, status=500) content = response.content or response.reason status = response.status_code response_headers = response.headers content_type = response.headers.get("Content-Type") if status >= 400: _response = HttpResponse(content=content, reason=content, status=status, content_type=content_type) return fetch_response_headers(_response, response_headers) # decompress GZipped responses if not enabled # if content and response and response.getheader('Content-Encoding') == 'gzip': if content and content_type and content_type == "gzip": buf = io.BytesIO(content) with gzip.GzipFile(fileobj=buf) as f: content = f.read() buf.close() PLAIN_CONTENT_TYPES = ["text", "plain", "html", "json", "xml", "gml"] for _ct in PLAIN_CONTENT_TYPES: if content_type and _ct in content_type and not isinstance(content, str): try: content = content.decode() break except Exception: pass if response and response_callback: kwargs = {} if not kwargs else kwargs kwargs.update( { "response": response, "content": content, "status": status, "response_headers": response_headers, "content_type": content_type, } ) return response_callback(**kwargs) else: # If we get a redirect, let's add a useful message. if status and status in (301, 302, 303, 307): _response = HttpResponse( ( f"This proxy does not support redirects. The server in '{url}' " f"asked for a redirect to '{response.getheader('Location')}'" ), status=status, content_type=content_type, ) _response["Location"] = response.getheader("Location") return fetch_response_headers(_response, response_headers) else: def _get_message(text): _s = text if isinstance(text, bytes): _s = text.decode("utf-8", "replace") try: found = re.search("<b>Message</b>(.+?)</p>", _s).group(1).strip() except Exception: found = _s return found _response = HttpResponse( content=content, reason=_get_message(content) if status not in (200, 201) else None, status=status, content_type=content_type, ) return fetch_response_headers(_response, response_headers)
[docs] def download(request, resourceid, sender=Dataset): _not_authorized = _("You are not authorized to download this resource.") _not_permitted = _("You are not permitted to save or edit this resource.") _no_files_found = _("No files have been found for this resource. Please, contact a system administrator.") instance = resolve_object( request, sender, {"pk": resourceid}, permission="base.download_resourcebase", permission_msg=_not_permitted ) if isinstance(instance, ResourceBase): dataset_files = [] file_list = [] # Store file info to be returned try: files = instance.resourcebase_ptr.files # Copy all Dataset related files into a temporary folder for file_path in files: if storage_manager.exists(file_path): dataset_files.append(file_path) filename = os.path.basename(file_path) file_list.append( { "name": filename, "data_iter": storage_manager.open(file_path), } ) else: return HttpResponse( loader.render_to_string( "401.html", context={"error_title": _("No files found."), "error_message": _no_files_found}, request=request, ), status=404, ) # Check we can access the original files if not dataset_files: return HttpResponse( loader.render_to_string( "401.html", context={"error_title": _("No files found."), "error_message": _no_files_found}, request=request, ), status=404, ) # ZIP everything and return target_file_name = "".join([instance.name, ".zip"]) target_zip = zipstream.ZipFile(mode="w", compression=zipstream.ZIP_DEFLATED, allowZip64=True) # Iterable: Needed when the file_info has it's data as a stream def _iterable(source_iter): while True: buf = source_iter.read(BUFFER_CHUNK_SIZE) if not buf: break yield buf # Add files to zip for file_info in file_list: target_zip.write_iter(arcname=file_info["name"], iterable=_iterable(file_info["data_iter"])) register_event(request, "download", instance) # Streaming content response response = StreamingHttpResponse(target_zip, content_type="application/zip") response["Content-Disposition"] = f'attachment; filename="{target_file_name}"' return response except (NotImplementedError, Upload.DoesNotExist): traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) return HttpResponse( loader.render_to_string( "401.html", context={"error_title": _("No files found."), "error_message": _no_files_found}, request=request, ), status=404, ) return HttpResponse( loader.render_to_string( "401.html", context={"error_title": _("Not Authorized"), "error_message": _not_authorized}, request=request ), status=403, )
[docs] class OWSListView(View):
[docs] def get(self, request): from geonode.geoserver import ows out = {"success": True} data = [] out["data"] = data # WMS _raw_url = ows._wms_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = "&" if _url.query else "?" _raw_url = _j.join([_raw_url, f"access_token={access_token}"]) data.append({"url": _raw_url, "type": "OGC:WMS"}) # WCS _raw_url = ows._wcs_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = "&" if _url.query else "?" _raw_url = _j.join([_raw_url, f"access_token={access_token}"]) data.append({"url": _raw_url, "type": "OGC:WCS"}) # WFS _raw_url = ows._wfs_get_capabilities() _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = "&" if _url.query else "?" _raw_url = _j.join([_raw_url, f"access_token={access_token}"]) data.append({"url": _raw_url, "type": "OGC:WFS"}) # catalogue from configuration for catname, catconf in settings.CATALOGUE.items(): # CSW _raw_url = catconf["URL"] _url = urlsplit(_raw_url) headers, access_token = get_headers(request, _url, _raw_url) if access_token: _j = "&" if _url.query else "?" _raw_url = _j.join([_raw_url, f"access_token={access_token}"]) data.append({"url": _raw_url, "type": "OGC:CSW"}) # main site url data.append({"url": settings.SITEURL, "type": "WWW:LINK"}) return json_response(out)
[docs] _hoppish = { "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade", "content-length", "content-encoding", }.__contains__
[docs] def is_hop_by_hop(header_name): """Return true if 'header_name' is an HTTP/1.1 "Hop-by-Hop" header""" return _hoppish(header_name.lower())
[docs] def fetch_response_headers(response, response_headers): if response_headers: for _header in response_headers: if not is_hop_by_hop(_header): if hasattr(response, "headers") and _header.lower() not in [ _k.lower() for _k in response.headers.keys() ]: response.headers[_header] = response_headers.get(_header) elif hasattr(response, "_headers") and _header.lower() not in [ _k.lower() for _k in response._headers.keys() ]: response._headers[_header] = (_header, response_headers.get(_header)) return response