geonode.harvesting.harvesters.geonodeharvester

Harvesters GeoNode remote servers.

Attributes

logger

Classes

GeoNodeDatasetType

Generic enumeration.

RemoteDatasetType

Generic enumeration.

GeoNodeResourceType

Generic enumeration.

GeoNodeResourceTypeCurrent

Generic enumeration.

GeonodeCurrentHarvester

A harvester for modern (v3.2+) GeoNode versions.

GeonodeLegacyHarvester

A harvester for older (v <= 3.2) GeoNode versions

GeonodeUnifiedHarvesterWorker

A harvester worker that is able to retrieve details from most GeoNode deployments.

Functions

get_contact_descriptor(contact)

get_identification_descriptor(csw_identification, ...)

_get_native_format(→ Optional[str])

get_spatial_extent_4326(...)

get_spatial_extent_native(api_record)

get_temporal_extent(...)

_get_optional_attribute_value(→ Optional[str])

_get_extra_config_schema(→ Dict)

_from_django_record(target_class, record)

_check_availability(→ bool)

Module Contents

geonode.harvesting.harvesters.geonodeharvester.logger[source]
class geonode.harvesting.harvesters.geonodeharvester.GeoNodeDatasetType[source]

Bases: enum.Enum

Generic enumeration.

Derive from this class to define new enumerations.

VECTOR = 'vector'[source]
RASTER = 'raster'[source]
class geonode.harvesting.harvesters.geonodeharvester.RemoteDatasetType[source]

Bases: enum.Enum

Generic enumeration.

Derive from this class to define new enumerations.

VECTOR = 'shapefile'[source]
RASTER = 'geotiff'[source]
class geonode.harvesting.harvesters.geonodeharvester.GeoNodeResourceType[source]

Bases: enum.Enum

Generic enumeration.

Derive from this class to define new enumerations.

DOCUMENT = 'documents'[source]
DATASET = 'layers'[source]
MAP = 'maps'[source]
class geonode.harvesting.harvesters.geonodeharvester.GeoNodeResourceTypeCurrent[source]

Bases: enum.Enum

Generic enumeration.

Derive from this class to define new enumerations.

DOCUMENT = 'document'[source]
DATASET = 'dataset'[source]
class geonode.harvesting.harvesters.geonodeharvester.GeonodeCurrentHarvester(*args, harvest_documents: bool | None = True, harvest_datasets: bool | None = True, copy_datasets: bool | None = False, copy_documents: bool | None = False, resource_title_filter: str | None = None, start_date_filter: str | None = None, end_date_filter: str | None = None, keywords_filter: List[str] | None = None, categories_filter: List[str] | None = None, **kwargs)[source]

Bases: geonode.harvesting.harvesters.base.BaseHarvesterWorker

A harvester for modern (v3.2+) GeoNode versions.

GeoNode versions above 3.2 introduced the concept of datasets to replace the older layers concept. The API also has some significative differences.

harvest_documents: bool[source]
harvest_datasets: bool[source]
harvest_maps: bool = False[source]
copy_documents: bool[source]
copy_datasets: bool[source]
resource_title_filter: str | None[source]
start_date_filter: str | None[source]
end_date_filter: str | None[source]
keywords_filter: List[str] | None[source]
categories_filter: List[str] | None[source]
http_session: requests.Session[source]
page_size: int = 10[source]
remote_url[source]
property base_api_url[source]
property allows_copying_resources: bool[source]
classmethod from_django_record(record: geonode.harvesting.models.Harvester)[source]
classmethod get_extra_config_schema() Dict[source]
get_num_available_resources() int[source]
list_resources(offset: int | None = 0) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
check_availability(timeout_seconds: int | None = 5) bool[source]
get_geonode_resource_type(remote_resource_type: str) Type[geonode.layers.models.Dataset | geonode.documents.models.Document][source]
get_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) geonode.harvesting.harvesters.base.HarvestedResourceInfo | None[source]
should_copy_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) bool[source]
get_geonode_resource_defaults(harvested_info: geonode.harvesting.harvesters.base.HarvestedResourceInfo, harvestable_resource: geonode.harvesting.models.HarvestableResource) Dict[source]
_get_contact_descriptor(role, contact_details: Dict)[source]
_get_resource_descriptor(raw_resource: Dict, remote_resource_type: str) geonode.harvesting.resourcedescriptor.RecordDescription[source]
_get_resource_list_params(offset: int | None = 0) Dict[source]
class geonode.harvesting.harvesters.geonodeharvester.GeonodeLegacyHarvester(*args, harvest_documents: bool | None = True, harvest_datasets: bool | None = True, copy_datasets: bool | None = False, copy_documents: bool | None = False, resource_title_filter: str | None = None, start_date_filter: str | None = None, end_date_filter: str | None = None, keywords_filter: list | None = None, categories_filter: list | None = None, **kwargs)[source]

Bases: geonode.harvesting.harvesters.base.BaseHarvesterWorker

A harvester for older (v <= 3.2) GeoNode versions

harvest_documents: bool[source]
harvest_datasets: bool[source]
harvest_maps: bool = False[source]
copy_documents: bool[source]
copy_datasets: bool[source]
resource_title_filter: str | None[source]
http_session: requests.Session[source]
page_size: int = 10[source]
remote_url[source]
start_date_filter[source]
end_date_filter[source]
keywords_filter[source]
categories_filter[source]
property base_api_url[source]
property allows_copying_resources: bool[source]
classmethod from_django_record(record: geonode.harvesting.models.Harvester)[source]
classmethod get_extra_config_schema() Dict[source]
get_num_available_resources() int[source]
list_resources(offset: int | None = 0) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
check_availability(timeout_seconds: int | None = 5) bool[source]

Check whether the remote GeoNode is online.

get_geonode_resource_type(remote_resource_type: str) Type[geonode.layers.models.Dataset | geonode.documents.models.Document | geonode.maps.models.Map][source]

Return resource type class from resource type string.

get_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) geonode.harvesting.harvesters.base.HarvestedResourceInfo | None[source]
should_copy_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) bool[source]
get_geonode_resource_defaults(harvested_info: geonode.harvesting.harvesters.base.HarvestedResourceInfo, harvestable_resource: geonode.harvesting.models.HarvestableResource) Dict[source]
_get_num_available_resources_by_type() Dict[GeoNodeResourceType, int][source]
_list_document_resources(offset: int) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
_list_dataset_resources(offset: int) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
_list_map_resources(offset: int) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
_list_resources_by_type(resource_type: GeoNodeResourceType, offset: int) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
_extract_unique_identifier(raw_remote_resource: Dict) str[source]
_get_resource_details(api_record: Dict, harvestable_resource: geonode.harvesting.models.HarvestableResource) geonode.harvesting.resourcedescriptor.RecordDescription | None[source]

Produce a record description from the response provided by the remote GeoNode.

_get_resource_list_params(offset: int | None = 0) Dict[source]
_get_total_records(resource_type: GeoNodeResourceType) int[source]
_get_resource_descriptor(csw_record: lxml.etree.Element, api_record: Dict, harvestable_resource: geonode.harvesting.models.HarvestableResource) geonode.harvesting.resourcedescriptor.RecordDescription[source]
_get_dataset_additional_parameters(descriptor: geonode.harvesting.resourcedescriptor.RecordDescription, api_record: Dict) Dict[source]
_get_document_additional_parameters(descriptor: geonode.harvesting.resourcedescriptor.RecordDescription, api_record: Dict) Dict[source]
_get_map_additional_parameters(descriptor: geonode.harvesting.resourcedescriptor.RecordDescription, api_record: Dict) Dict[source]
get_distribution_info(csw_distribution: lxml.etree.Element, api_record: Dict, harvestable_resource: geonode.harvesting.models.HarvestableResource, identification_descriptor: geonode.harvesting.resourcedescriptor.RecordIdentification, crs: str) geonode.harvesting.resourcedescriptor.RecordDistribution[source]
_retrieve_thumbnail_url(api_record: Dict, harvestable_resource: geonode.harvesting.models.HarvestableResource) str[source]
class geonode.harvesting.harvesters.geonodeharvester.GeonodeUnifiedHarvesterWorker(*args, harvest_documents: bool | None = True, harvest_datasets: bool | None = True, copy_datasets: bool | None = False, copy_documents: bool | None = False, resource_title_filter: str | None = None, start_date_filter: str | None = None, end_date_filter: str | None = None, keywords_filter: List[str] | None = None, categories_filter: List[str] | None = None, **kwargs)[source]

Bases: geonode.harvesting.harvesters.base.BaseHarvesterWorker

A harvester worker that is able to retrieve details from most GeoNode deployments.

This harvester type relies on the GeonodeCurrentHarvester and GeonodeLegacyHarvester for most operations. It simply determines which concrete harvester to use based on the remote’s response for the availability check and then uses it.

_concrete_harvester_worker: GeonodeCurrentHarvester | GeonodeLegacyHarvester | None[source]
remote_url[source]
http_session[source]
harvest_documents[source]
harvest_datasets[source]
copy_datasets[source]
copy_documents[source]
resource_title_filter[source]
start_date_filter[source]
end_date_filter[source]
keywords_filter[source]
categories_filter[source]
property concrete_worker: GeonodeCurrentHarvester | GeonodeLegacyHarvester[source]
property allows_copying_resources: bool[source]
classmethod from_django_record(record: geonode.harvesting.models.Harvester)[source]
classmethod get_extra_config_schema() Dict[source]
get_num_available_resources() int[source]
list_resources(offset: int | None = 0) List[geonode.harvesting.harvesters.base.BriefRemoteResource][source]
check_availability(timeout_seconds: int | None = 5) bool[source]
get_geonode_resource_type(remote_resource_type: str) Type[geonode.layers.models.Dataset | geonode.documents.models.Document][source]
get_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) geonode.harvesting.harvesters.base.HarvestedResourceInfo | None[source]
should_copy_resource(harvestable_resource: geonode.harvesting.models.HarvestableResource) bool[source]
get_geonode_resource_defaults(harvested_info: geonode.harvesting.harvesters.base.HarvestedResourceInfo, harvestable_resource: geonode.harvesting.models.HarvestableResource) Dict[source]
_get_concrete_worker() GeonodeCurrentHarvester | GeonodeLegacyHarvester[source]
geonode.harvesting.harvesters.geonodeharvester.get_contact_descriptor(contact: lxml.etree.Element)[source]
geonode.harvesting.harvesters.geonodeharvester.get_identification_descriptor(csw_identification: lxml.etree.Element, api_record: Dict)[source]
geonode.harvesting.harvesters.geonodeharvester._get_native_format(csw_identification: lxml.etree.Element, api_record: Dict) str | None[source]
geonode.harvesting.harvesters.geonodeharvester.get_spatial_extent_4326(identification_el: lxml.etree.Element) django.contrib.gis.geos.Polygon | None[source]
geonode.harvesting.harvesters.geonodeharvester.get_spatial_extent_native(api_record: Dict)[source]
geonode.harvesting.harvesters.geonodeharvester.get_temporal_extent(identification_el: lxml.etree.Element) Tuple[datetime.datetime, datetime.datetime] | None[source]
geonode.harvesting.harvesters.geonodeharvester._get_optional_attribute_value(element: lxml.etree.Element, xpath: str) str | None[source]
geonode.harvesting.harvesters.geonodeharvester._get_extra_config_schema() Dict[source]
geonode.harvesting.harvesters.geonodeharvester._from_django_record(target_class: Type, record: geonode.harvesting.models.Harvester)[source]
geonode.harvesting.harvesters.geonodeharvester._check_availability(http_session, url: str, payload_key_to_check: str, timeout_seconds: int | None = 5) bool[source]