stephenpascoe · cehbrecht · Dec 5, 2014 · Apr 14, 2015 · May 27, 2015 · Jun 15, 2015
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,6 @@ generated/
 /eclipse
 /backup
 ./do_jenkins.sh
+test/url_cache
+.esg/
+env/
diff --git a/docs/concepts.rst b/docs/concepts.rst
@@ -27,18 +27,22 @@ facets       SearchContext     Set in constructor
 fields       SearchContext     Set in constructor
 replica      SearchContext     Set in constructor
 type         SearchContext     Create contexts with the right type using :meth:`ResultSet.file_context`, etc.
-from         SearchContext     Not implemented yet.  Placeholder name "from_timestamp"
-to           SearchContext     Not implemented yet.  Placeholder name "to_timestamp"
+from         SearchContext     Set in constructor. Use "from_timestamp" in the context API.
+to           SearchContext     Set in constructor. Use "to_timestamp" in the context API.
 fields       n/a               Managed internally
 format       n/a               Managed internally
 id           n/a               Managed internally
 ===========  ================  =================================================================================================== 
 
+Temporal keywords
+'''''''''''''''''
 
-Temporal / Spatial keywords
-'''''''''''''''''''''''''''
+Temporal keywords are supported for Dataset search. The terms "from_timestamp" and "to_timestamp" should be used with values following the format "YYYY-MM-DDThh:mm:ssZ".
 
-Temporal and spatial keywords are not yet supported by :mod:`pyesgf.search` however the API does have placeholders for these keywords anticipating future implementation:
+Spatial keywords
+''''''''''''''''
+
+Spatial keywords are not yet supported by :mod:`pyesgf.search` however the API does have placeholders for these keywords anticipating future implementation:
 
 Facet keywords
 ''''''''''''''

diff --git a/docs/examples.rst b/docs/examples.rst
@@ -52,6 +52,26 @@ Find download URLs for all files in a dataset
   http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/tro3/mon/grid/NASA-JPL/TES/v20110608/tro3_TES_L3_tbd_200507-200912.nc
   http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/tro3Stderr/mon/grid/NASA-JPL/TES/v20110608/tro3Stderr_TES_L3_tbd_200507-200912.nc
 
+Define a search for datasets that includes a temporal range:
+
+  >>> conn = SearchConnection('http://esgf-index1.ceda.ac.uk/esg-search',
+                        distrib=False)
+  >>> ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES",
+          time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True,
+          from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z")
+  >>> ctx.hit_count
+  3
+
+Or do the same thing by searching without temporal constraints and then applying the constraint:
+
+  >>> ctx = conn.new_context(project = "CMIP5", model = "HadGEM2-ES",
+          time_frequency = "mon", realm = "atmos", ensemble = "r1i1p1", latest = True)
+  >>> ctx.hit_count
+  21
+  >>> ctx = ctx.constrain(from_timestamp = "2100-12-30T23:23:59Z", to_timestamp = "2200-01-01T00:00:00Z")
+  >>> ctx.hit_count
+  3
+
 Obtain MyProxy credentials to allow downloading files or using secured OPeNDAP
 
   >>> from pyesgf.logon import LogonManager

diff --git a/docs/logon.rst b/docs/logon.rst
@@ -4,5 +4,7 @@ ESGF Security API
 
 :mod:`pyesgf` provides a simplified interface to obtaining ESGF credentials.
 
+.. warning:: This interface only works with ***Python versions 2.7.9 or greater*** (due to an SSL update).
+
 .. automodule:: pyesgf.logon
    :members:
diff --git a/pyesgf/__init__.py b/pyesgf/__init__.py
@@ -3,7 +3,7 @@
 
 """
 
-__version__ = '0.1.2'
+__version__ = '0.1.6'
 
 #!TODO: ResultFormatter class.  process response json to specialise the result json.  Default is None
 #!TODO: pipe results to new process.  Command-line interface.

diff --git a/pyesgf/search/connection.py b/pyesgf/search/connection.py
@@ -27,7 +27,9 @@
 import warnings
 import logging
 
+logging.basicConfig()
 log = logging.getLogger(__name__)
+log.setLevel(logging.INFO)
 
 from .context import DatasetSearchContext
 from .consts import RESPONSE_FORMAT, SHARD_REXP
@@ -126,7 +128,7 @@ def send_wget(self, query_dict, shards=None):
     def _send_query(self, endpoint, full_query):
         """
         Generally not to be called directly by the user but via SearchContext
-	instances.
+        instances.
 
         :param full_query: dictionary of query string parameers to send.
         :return: the urllib2 response object from the query.
@@ -138,7 +140,16 @@ def _send_query(self, endpoint, full_query):
         query_url = '%s/%s?%s' % (self.url, endpoint, urlencode(full_query))
         log.debug('Query request is %s' % query_url)
 
-        response = urllib2.urlopen(query_url)
+        try:
+            response = urllib2.urlopen(query_url)
+        except urllib2.HTTPError, err:
+            log.warn("HTTP request received error code: %s" % err.code)
+            if err.code == 400:
+                errors = set(re.findall("Invalid HTTP query parameter=(\w+)", err.fp.read()))
+                content = "; ".join([e for e in list(errors)])
+                raise Exception("Invalid query parameter(s): %s" % content)
+            else:
+                raise Exception("Error returned from URL: %s" % query_url)
 
         return response
 
@@ -155,7 +166,12 @@ def _build_query(self, query_dict, limit=None, offset=None, shards=None):
                 else:
                     for port, suffix in self._available_shards[shard]:
                         # suffix should be ommited when querying
-                        shard_specs.append('%s:%s/solr' % (shard, port))
+                        if not port:
+                            port_string = ""
+                        else:
+                            port_string = ":%s" % port
+
+                        shard_specs.append('%s%s/solr' % (shard, port_string))
 
             shard_str = ','.join(shard_specs)
         else:
@@ -229,9 +245,8 @@ def get_shard_list(self):
 
     def new_context(self, context_class=None,
                     latest=None, facets=None, fields=None,
-                    #!TODO: add once implemented
-                    #from_timestamp=None, to_timestamp=None,
-                    replica=None, shards=None,
+                    from_timestamp=None, to_timestamp=None,
+                    replica=None, shards=None, search_type=None,
                     **constraints):
         """
         Returns a :class:`pyesgf.search.context.SearchContext` class for 
@@ -246,10 +261,10 @@ def new_context(self, context_class=None,
 
         return context_class(self, constraints,
                              latest=latest, facets=facets, fields=fields,
-                             #!TODO: add once implemented
-                             #from_timestamp=from_timestamp, 
-                             #to_timestamp=to_timestamp,
+                             from_timestamp=from_timestamp, 
+                             to_timestamp=to_timestamp,
                              replica=replica, shards=shards,
+                             search_type=search_type,
         )
 
 
@@ -266,7 +281,7 @@ def query_keyword_type(keyword):
 
     if keyword == 'query':
         return 'freetext'
-    elif keyword in ['start', 'end']:
+    elif keyword in ['start', 'end', 'from_timestamp', 'to_timestamp']:
         return 'temporal'
     elif keyword in ['lat', 'lon', 'bbox', 'location', 'radius', 'polygon']:
         return 'geospatial'

diff --git a/pyesgf/search/consts.py b/pyesgf/search/consts.py
@@ -10,4 +10,5 @@
 
 OPERATOR_NEQ = 'not_equal'
 
-SHARD_REXP = r'(?P<host>.*?):(?P<port>\d*)/solr(?P<suffix>.*)'
+SHARD_REXP = r'^(?P<prefix>https?://)?(?P<host>.+?):?(?P<port>\d+)?/(?P<suffix>.*)$'
+
diff --git a/pyesgf/search/context.py b/pyesgf/search/context.py
@@ -65,8 +65,10 @@ def __init__(self, connection, constraints, search_type=None,
             or only non-latest versions, or None to return both.
         :param shards: list of shards to restrict searches to.  Should be from the list
             self.connection.get_shard_list()
-        :param from_timestamp: NotImplemented
-        :param to_timestamp: NotImplemented
+        :param from_timestamp: Date-time string to specify start of search range 
+            (e.g. "2000-01-01T00:00:00Z"). 
+        :param to_timestamp: Date-time string to specify end of search range
+            (e.g. "2100-12-31T23:59:59Z").
 
         """
 
@@ -80,7 +82,7 @@ def __init__(self, connection, constraints, search_type=None,
         #  Constraints
         self.freetext_constraint = None
         self.facet_constraints = MultiDict()
-        self.temporal_constraint = (None, None)
+        self.temporal_constraint = [from_timestamp, to_timestamp]
         self.geosplatial_constraint = None
 
         self._update_constraints(constraints)
@@ -221,7 +223,10 @@ def _update_constraints(self, constraints):
             self._constrain_freetext(new_freetext)
 
         #!TODO: implement temporal and geospatial constraints
-        #self._constrain_temporal()
+        if 'from_timestamp' in constraints_split['temporal']:
+            self.temporal_constraint[0] = constraints_split['temporal']['from_timestamp']
+        if 'to_timestamp' in constraints_split['temporal']:
+            self.temporal_constraint[1] = constraints_split['temporal']['to_timestamp']
         #self._constrain_geospatial()
 
         # reset cached values
@@ -242,18 +247,6 @@ def _constrain_facets(self, facet_constraints):
     def _constrain_freetext(self, query):
         self.freetext_constraint = query
 
-    def _constrain_temporal(self, start, end):
-        """
-        :param start: a datetime instance specifying the start of the temporal
-            constraint.
-        :param end: a datetime instance specifying the end of the temporal
-            constraint.
-
-        """
-        #!TODO: support solr date keywords like "NOW" and "NOW-1DAY"
-        #     we will probably need a separate TemporalConstraint object
-        self.temporal_constraint = (start, end)
-
     def _constrain_geospatial(self, lat=None, lon=None, bbox=None, location=None,
                               radius=None, polygon=None):
         self.geospatial_constraint = GeospatialConstraint(lat, lon, bbox, location, radius, polygon)
@@ -277,6 +270,7 @@ def _split_constraints(self, constraints):
         from .connection import query_keyword_type
 
         constraints_split = dict((kw, MultiDict()) for kw in QUERY_KEYWORD_TYPES)
+
         for kw, val in constraints.items():
             constraint_type = query_keyword_type(kw)
             constraints_split[constraint_type][kw] = val
@@ -300,8 +294,8 @@ def _build_query(self):
         query_dict.extend(self.facet_constraints)
 
         #!TODO: encode datetime
-        #start, end = self.temporal_constraint
-        #query_dict.update(start=start, end=end)
+        start, end = self.temporal_constraint
+        query_dict.update(start=start, end=end)
 
         return query_dict
 

diff --git a/setup.py b/setup.py
@@ -25,10 +25,10 @@
         'Programming Language :: Python :: 2.6',
         ], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
       keywords='',
-      author='Stephen Pascoe',
-      author_email='Stephen.Pascoe@stfc.ac.uk',
+      author='Ag Stephens',
+      author_email='Ag.Stephens@stfc.ac.uk',
       url='http://esgf-pyclient.readthedocs.org',
-      download_url='http://github.org/stephenpascoe/esgf-pyclient',
+      download_url='http://github.com/ESGF/esgf-pyclient',
       license='BSD',
       packages=find_packages(exclude=['ez_setup', 'examples', 'test']),
       include_package_data=True,

diff --git a/test/config.py b/test/config.py
@@ -3,6 +3,6 @@
 
 """
 
-TEST_SERVICE='http://esgf-node.ipsl.fr/esg-search'
+TEST_SERVICE='http://esgf-index1.ceda.ac.uk/esg-search'
 CACHE_DIR = 'url_cache'
 
diff --git a/test/test_connection.py b/test/test_connection.py
@@ -28,9 +28,9 @@ def test_get_shard_list():
     shards = conn.get_shard_list()
     #!NOTE: the exact shard list will change depending on the shard replication configuration
     #    on the test server
-    assert 'esgf-node.ipsl.fr' in shards
+    assert 'esgf-index2.ceda.ac.uk' in shards
     # IPSL now replicates all non-local shards.  Just check it has a few shards
-    assert len(shards['esgf-node.ipsl.fr']) > 4
+    assert len(shards['esgf-index2.ceda.ac.uk']) > 3
 
 
 def test_url_fixing():

diff --git a/test/test_context.py b/test/test_context.py
@@ -63,14 +63,10 @@ def test_context_facet_options():
     conn = SearchConnection(TEST_SERVICE)
     context = conn.new_context(project='CMIP5', model='IPSL-CM5A-LR',
                                ensemble='r1i1p1', experiment='rcp60',
-                               realm='seaIce'
-        )
+                               realm='seaIce')
 
-    assert context.get_facet_options().keys() == [
-        'product', 'cf_standard_name', 'variable_long_name', 'cmor_table',
-        'time_frequency', 'variable'
-        ]
-
+    assert context.get_facet_options().keys() == ['data_node', 'cf_standard_name', 'variable_long_name', 
+                               'cmor_table', 'time_frequency', 'variable']
 
 
 def test_context_facets3():
@@ -153,7 +149,6 @@ def test_negative_facet():
 
     assert hits1 == hits2 + hits3
 
-
 def test_replica():
     # Test that we can exclude replicas
     # This tests assumes the test dataset is replicated
@@ -169,3 +164,19 @@ def test_replica():
         replica=False)
 
     assert context.hit_count == 1
+
+def test_response_from_bad_parameter():
+    # Test that a bad parameter name raises a useful exception
+    # NOTE::: !!! This fails because urllib2 HTTP query is overrided with 
+    #         !!! cache handler instead of usual response. 
+    #         !!! Fix needs to make sure cached URL request has response exceptions matching urllib2 exception
+    conn = SearchConnection(TEST_SERVICE)
+    context = conn.new_context(project='CMIP5', rubbish='nonsense')
+    context.hit_count
+
+    try:
+        context.hit_count
+    except Exception, err:
+        assert str(err).strip() == "Invalid query parameter(s): rubbish"
+
+
diff --git a/test/test_results.py b/test/test_results.py
@@ -17,7 +17,7 @@ def test_result1():
     results = ctx.search()
 
     r1 = results[0]
-    assert re.match(r'cmip5\.output1\.IPSL\..\|vesg.ipsl.fr', r1.dataset_id)
+    assert re.match(r'cmip5\.output1\.MOHC\..+\|esgf-data1.ceda.ac.uk', r1.dataset_id)
 
 def test_file_context():
     conn = SearchConnection(TEST_SERVICE, distrib=False)
@@ -60,8 +60,7 @@ def test_file_list2():
 
     file_results = f_ctx.search()
     for file_result in file_results:
-        print file_result.download_url
-        assert re.match(r'http://vesg.ipsl.fr/thredds/.*\.nc', file_result.download_url)
+        assert re.search(r'ds/.*\.nc', file_result.download_url)
 
 def test_aggregations():
     conn = SearchConnection(TEST_SERVICE, distrib=False)
@@ -123,6 +122,7 @@ def test_shards_constrain():
     full_query = f_ctx.connection._build_query(query_dict, shards=f_ctx.shards)
 
     #!TODO: Force fail to see whether shards is passed through.
+    # NOTE: 'shards' is NOT even a key in this dictionary. Needs rewrite!!!
     q_shard = full_query['shards']
     # Check it isn't a ',' separated list
     assert ',' not in q_shard

diff --git a/test/test_shard_regex.py b/test/test_shard_regex.py
@@ -0,0 +1,40 @@
+"""
+Test regular expression for matching shard end points.
+"""
+
+from pyesgf.search.consts import SHARD_REXP
+import re
+
+tests = [
+"https://esgf-test.a.b.c/solr",
+"http://esgf.a.c/solr/data",
+"http://esgs.a.d:80/data/solr",
+"esgf.a.c:80/solr",
+"esgf.a.c/solr"
+]
+
+expected = [
+("https://", "esgf-test.a.b.c", None, "solr"),
+("http://", "esgf.a.c", None, "solr/data"),
+("http://", "esgs.a.d", "80", "data/solr"),
+(None, "esgf.a.c", "80", "solr"),
+(None, "esgf.a.c", None, "solr")
+]
+
+keys = ("prefix", "host", "port", "suffix") 
+
+R = re.compile("^(?P<prefix>https?://)?(?P<host>.+?):?(?P<port>\d+)?/(?P<suffix>.+)$")
+
+def test_regex():
+    for i, test in enumerate(tests):
+
+        match = R.match(test)
+        d = match.groupdict()
+        values = tuple([d[key] for key in keys])
+
+        assert values == expected[i]
+
+
+if __name__ == "__main__":
+    test_regex()
+