diff --git a/README.md b/README.md index 04e7d39..5eefccb 100644 --- a/README.md +++ b/README.md @@ -491,6 +491,18 @@ demo = TestAppCKAN(test_app, apikey='my-test-key') groups = demo.action.group_list(id='data-explorer') ``` +## Timeouts + +All requests performed to CKAN either via the CLI or the Python module have a timeout defined. +It currently defaults to 5 seconds. You can define a custom timeout value using the following +environment variables: + +* `CKANAPI_REQUEST_TIMEOUT`: this is the connect timeout (the time waited to connect to the remote server) +* `CKANAPI_REQUEST_READ_TIMEOUT`: this is the read timeout (the time waited to receive a response) + +If the read timeout is not defined, the connect timeout will be used. Please refer to +the [requests library documentation](https://requests.readthedocs.io/en/latest/user/advanced/#timeouts) for more details + ## Tests diff --git a/ckanapi/cli/load.py b/ckanapi/cli/load.py index 450ab91..6c1a9b9 100644 --- a/ckanapi/cli/load.py +++ b/ckanapi/cli/load.py @@ -10,6 +10,7 @@ import re from urllib.parse import urlparse +from ckanapi.common import REQUEST_TIMEOUT from ckanapi.errors import (NotFound, NotAuthorized, ValidationError, SearchIndexError) from ckanapi.cli import workers @@ -284,7 +285,7 @@ def _upload_resources(ckan,obj,arguments): if resource.get('url_type') != 'upload': continue - f = requests.get(resource['url'],stream=True) + f = requests.get(resource['url'], stream=True, timeout=REQUEST_TIMEOUT) name = resource['url'].rsplit('/',1)[-1] ckan.call_action('resource_patch', {'id':resource['id']}, @@ -301,9 +302,9 @@ def _upload_logo(ckan,obj_orig): obj['clear_upload'] = True obj['image_upload'] = obj['image_url'] else: - f = requests.get(obj['image_display_url'],stream=True) + f = requests.get(obj['image_display_url'], stream=True, timeout=REQUEST_TIMEOUT) name,ext = obj['image_url'].rsplit('.',1) #reformulate image_url for new site - new_name = re.sub('[0-9\.-]','',name) + new_name = re.sub('[0-9.-]','',name) new_url = new_name+'.'+ext obj['image_upload'] = (new_url, f.raw) ckan.action.group_update(**obj) diff --git a/ckanapi/common.py b/ckanapi/common.py index 87d2bd7..fbe8e96 100644 --- a/ckanapi/common.py +++ b/ckanapi/common.py @@ -3,11 +3,18 @@ """ import json +import os from ckanapi.errors import (CKANAPIError, NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, SearchIndexError, ServerIncompatibleError) + +request_connection_timeout= int(os.getenv("CKANAPI_REQUEST_TIMEOUT", default=5)) +request_read_timeout= int(os.getenv("CKANAPI_REQUEST_READ_TIMEOUT", default=request_connection_timeout)) +REQUEST_TIMEOUT = (request_connection_timeout, request_read_timeout) + + class ActionShortcut(object): """ ActionShortcut(foo).bar(baz=2) <=> foo.call_action('bar', {'baz':2}) diff --git a/ckanapi/datapackage.py b/ckanapi/datapackage.py index c9bd1a7..f3e1a00 100644 --- a/ckanapi/datapackage.py +++ b/ckanapi/datapackage.py @@ -4,6 +4,7 @@ import slugify +from ckanapi.common import REQUEST_TIMEOUT from ckanapi.cli.utils import pretty_json from ckanapi.errors import CKANAPIError, NotFound @@ -24,7 +25,7 @@ def create_resource(resource, filename, datapackage_dir, stderr, apikey): headers['Authorization'] = apikey try: - r = requests.get(resource['url'], headers=headers, stream=True) + r = requests.get(resource['url'], headers=headers, stream=True, timeout=REQUEST_TIMEOUT) with open(os.path.join(datapackage_dir, path), 'wb') as f: for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE): if chunk: # filter out keep-alive new chunks diff --git a/ckanapi/remoteckan.py b/ckanapi/remoteckan.py index 75c4c62..fff111f 100644 --- a/ckanapi/remoteckan.py +++ b/ckanapi/remoteckan.py @@ -3,7 +3,7 @@ from ckanapi.errors import CKANAPIError from ckanapi.common import (ActionShortcut, prepare_action, - reverse_apicontroller_action) + reverse_apicontroller_action, REQUEST_TIMEOUT) from ckanapi.version import __version__ import os @@ -84,6 +84,7 @@ def call_action(self, action, data_dict=None, context=None, apikey=None, headers['User-Agent'] = self.user_agent url = self.address.rstrip('/') + '/' + url requests_kwargs = requests_kwargs or {} + requests_kwargs.setdefault("timeout", REQUEST_TIMEOUT) if not self.session: self.session = requests.Session() if self.get_only: diff --git a/ckanapi/tests/test_remote.py b/ckanapi/tests/test_remote.py index 59604bd..54fbb6c 100644 --- a/ckanapi/tests/test_remote.py +++ b/ckanapi/tests/test_remote.py @@ -4,9 +4,12 @@ import atexit import socket import requests +import json from ckanapi import RemoteCKAN, NotFound +from ckanapi.common import REQUEST_TIMEOUT import unittest +from unittest import mock from subprocess import DEVNULL from urllib.request import urlopen, URLError from io import StringIO @@ -105,8 +108,32 @@ def test_resource_upload_content_type(self): files={'upload': StringIO(NUMBER_THING_CSV)}) self.assertEqual(res.split(';')[0], "multipart/form-data") + def test_default_timeout(self): + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.text = json.dumps({"success": True, "result": []}) + + with mock.patch('requests.Session.post', return_value=mock_response) as mock_post: + with RemoteCKAN(TEST_CKAN) as ckan: + ckan.action.organization_list() + _, kwargs = mock_post.call_args + self.assertEqual(kwargs.get('timeout'), REQUEST_TIMEOUT) + + def test_custom_timeout(self): + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.text = json.dumps({"success": True, "result": []}) + + # We patch at the module level because the env var is read at import time and + # can't be patched + with mock.patch("ckanapi.remoteckan.REQUEST_TIMEOUT", (2, 30)): + with mock.patch('requests.Session.post', return_value=mock_response) as mock_post: + with RemoteCKAN(TEST_CKAN) as ckan: + ckan.action.organization_list() + _, kwargs = mock_post.call_args + self.assertEqual(kwargs.get('timeout'), (2, 30)) + @classmethod def tearDownClass(cls): cls._mock_ckan.kill() cls._mock_ckan.wait() -