Merge pull request #736 from simphony/dev

Merge release 3.6.0.
simphony · Dec 9, 2021 · e1383f8 · e1383f8
2 parents 89aff8d + f1e4796
commit e1383f8
Show file tree

Hide file tree

Showing 6 changed files with 168 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -1,17 +1,12 @@
 [![Documentation Status](https://readthedocs.org/projects/simphony/badge/?version=latest)](https://simphony.readthedocs.io/en/latest/?badge=latest)
 ![](https://github.com/simphony/osp-core/workflows/CI/badge.svg)
 
-## Note
-
-For applications where performance is important, we recommend v3.4.2 
-The performance optimizations will be done as a next step to v3.5.X
-
 # OSP core
 
 Native implementation of the core cuds object and the class generation
 by the SimPhoNy team at Fraunhofer IWM. Builds up on the previous
 version, simphony-common (SimPhoNy, EU FP7 Project (Nr. 604005)
-www.simphony-project.eu)
+www.simphony-project.eu).
 
 ## Requirements
 

diff --git a/osp/core/cuds.py b/osp/core/cuds.py
@@ -143,7 +143,7 @@ def session(self) -> Session:
     def oclasses(self):
         """Get the ontology classes of this CUDS object."""
         result = list()
-        for s, p, o in self._graph.triples((self.iri, RDF.type, None)):
+        for o in self._graph.objects(self.iri, RDF.type):
             r = from_iri(o, raise_error=False)
             if r is not None:
                 result.append(r)
@@ -630,12 +630,9 @@ def _add_direct(self, cuds_object, rel):
                 add.
         """
         # First element, create set
-        if rel not in self._neighbors.keys():
-            self._neighbors[rel] = \
-                {cuds_object.uid: cuds_object.oclasses}
-        # Element not already there
-        elif cuds_object.uid not in self._neighbors[rel]:
-            self._neighbors[rel][cuds_object.uid] = cuds_object.oclasses
+        if rel not in self._neighbors:
+            self._neighbors[rel] = dict()
+        self._neighbors[rel][cuds_object.uid] = cuds_object.oclasses
 
     def _add_inverse(self, cuds_object, rel):
         """Add the inverse relationship from self to cuds_object.
@@ -693,7 +690,7 @@ def _get(self, *uids, rel=None, oclass=None, return_mapping=False):
         self.session._notify_read(self)
         # consider either given relationship and subclasses
         # or all relationships.
-        consider_relationships = set(self._neighbors.keys())
+        consider_relationships = set(self._neighbors)
         if rel:
             consider_relationships &= set(rel.subclasses)
         consider_relationships = list(consider_relationships)
@@ -733,35 +730,24 @@ def _get_by_uids(self, uids, relationships, return_mapping):
                 uids (+ Mapping from uids to relationships, which
                 connect self to the respective Cuds object.)
         """
-        not_found_uids = dict(enumerate(uids)) if uids \
-            else None
+        collected_uid = [None] * len(uids)
         relationship_mapping = dict()
-        for relationship in relationships:
-
-            # uids are given.
-            # Check which occur as object of current relation.
-            found_uids_indexes = set()
-
-            # we need to iterate over all uids for every
-            # relationship if we compute a mapping
-            iterator = enumerate(uids) if relationship_mapping \
-                else not_found_uids.items()
-            for i, uid in iterator:
-                if uid in self._neighbors[relationship]:
-                    found_uids_indexes.add(i)
-                    if uid not in relationship_mapping:
-                        relationship_mapping[uid] = set()
-                    relationship_mapping[uid].add(relationship)
-            for i in found_uids_indexes:
-                if i in not_found_uids:
-                    del not_found_uids[i]
-
-        collected_uid = [(uid if i not in not_found_uids
-                          else None)
-                         for i, uid in enumerate(uids)]
-        if return_mapping:
-            return collected_uid, relationship_mapping
-        return collected_uid
+        uids_cache = {relationship: set(self._neighbors[relationship])
+                      for relationship in relationships}
+        for i, uid in enumerate(uids):
+            relationship_set = {relationship
+                                for relationship in relationships
+                                if uid in uids_cache[relationship]}
+            # The following line was a performance hog, and was therefore
+            #  replaced by the one above.
+            #                   if uid in self._neighbors[relationship]}
+            if relationship_set:
+                collected_uid[i] = uid
+                relationship_mapping[uid] = relationship_set
+
+        return collected_uid \
+            if not return_mapping else \
+            (collected_uid, relationship_mapping)
 
     def _get_by_oclass(self, oclass, relationships, return_mapping):
         """Get the cuds_objects with given oclass.
@@ -816,8 +802,7 @@ def _load_cuds_objects(self, uids):
         Yields:
             Cuds: The loaded cuds_objects
         """
-        without_none = [uid for uid in uids
-                        if uid is not None]
+        without_none = filter(None, uids)
         cuds_objects = self.session.load(*without_none)
         for uid in uids:
             if uid is None:
@@ -873,11 +858,11 @@ def __getattr__(self, name):
             The value of the attribute: Any
         """
         try:
-            attr = self._get_attribute_by_argname(name)
+            identifier = self._get_attribute_identifier_by_argname(name)
             if self.session:
                 self.session._notify_read(self)
             value = self._rdflib_5_inplace_modification_prevention_filter(
-                self._graph.value(self.iri, attr.iri).toPython(), attr)
+                self._graph.value(self.iri, identifier).toPython(), identifier)
             return value
         except AttributeError as e:
             if (  # check if user calls session's methods on wrapper
@@ -901,6 +886,14 @@ def _get_attribute_by_argname(self, name):
                 return attr
         raise AttributeError(name)
 
+    def _get_attribute_identifier_by_argname(self, name):
+        """Get the identifier of an attribute of this CUDS by argname."""
+        for oclass in self.oclasses:
+            identifier = oclass.get_attribute_identifier_by_argname(name)
+            if identifier is not None:
+                return identifier
+        raise AttributeError(name)
+
     @staticmethod
     def _rdflib_5_inplace_modification_prevention_filter(
             value: Any, attribute: OntologyAttribute) -> Any:

diff --git a/osp/core/neighbor_dict.py b/osp/core/neighbor_dict.py
@@ -30,6 +30,12 @@ def __iter__(self):
         return self._iter()
         # TODO maybe it's more secure to notify read after each iteration step?
 
+    def __contains__(self, item):
+        """Notify on containment check."""
+        if self.cuds_object.session:
+            self.cuds_object.session._notify_read(self.cuds_object)
+        return self._contains(item)
+
     def __getitem__(self, key):
         """Notify on read."""
         if not self.key_check(key):
@@ -143,16 +149,22 @@ def __bool__(self):
                 return True
         return False
 
+    def _contains(self, item: OntologyRelationship):
+        """Checks if an item belongs to the dictionary."""
+        return (self.cuds_object.iri, item.iri, None) in self.graph
+
     def _iter(self):
         """Iterate over the dictionary."""
-        predicates = set([
-            p for _, p, _ in self.graph.triples((self.cuds_object.iri,
-                                                 None, None))
-        ])
+        predicates = set(self.graph.predicates(self.cuds_object.iri, None))
+        # Using set(..) instead of the iterator directly makes the code 2x
+        #  faster.
         for p in predicates:
-            if (p, rdflib.RDF.type, rdflib.OWL.ObjectProperty) \
-                    in namespace_registry._graph:
-                yield from_iri(p)
+            try:
+                obj = from_iri(p)
+                if isinstance(obj, OntologyRelationship):
+                    yield obj
+            except KeyError:
+                pass
 
 
 class NeighborDictTarget(NeighborDict):
@@ -210,6 +222,10 @@ def _iter(self):
         Yields:
             UUID: The UUIDs of the CUDS object related with self.rel.
         """
-        for s, p, o in self.graph.triples((self.cuds_object.iri,
-                                           self.rel.iri, None)):
+        for o in self.graph.objects(self.cuds_object.iri, self.rel.iri):
             yield uid_from_iri(o)
+
+    def _contains(self, item):
+        """Checks if an item belongs to the dictionary."""
+        return (self.cuds_object.iri, self.rel.iri, iri_from_uid(item)) \
+            in self.graph
diff --git a/osp/core/ontology/oclass.py b/osp/core/ontology/oclass.py
@@ -14,6 +14,25 @@
 BLACKLIST = {OWL.Nothing, OWL.Thing,
              OWL.NamedIndividual}
 
+# CACHE Introduced because getting URIRef terms from the namespaces is
+#  computationally expensive.
+CACHE = {
+    'cuba:_default': rdflib_cuba._default,
+    'cuba:_default_attribute': rdflib_cuba._default_attribute,
+    'cuba:_default_value': rdflib_cuba._default_value,
+    'owl:DatatypeProperty': OWL.DatatypeProperty,
+    'owl:Restriction': OWL.Restriction,
+    'owl:allValuesFrom': OWL.allValuesFrom,
+    'owl:cardinality': OWL.cardinality,
+    'owl:minCardinality': OWL.minCardinality,
+    'owl:hasValue': OWL.hasValue,
+    'owl:someValuesFrom': OWL.someValuesFrom,
+    'owl:onProperty': OWL.onProperty,
+    'rdf:type': RDF.type,
+    'rdfs:domain': RDFS.domain,
+    'rdfs:subClassOf': RDFS.subClassOf,
+}
+
 
 class OntologyClass(OntologyEntity):
     """A class defined in the ontology."""
@@ -38,7 +57,7 @@ def attributes(self):
         """Get all the attributes of this oclass.
 
         Returns:
-            Dict[OntologyAttribute, str]: Mapping from attribute to default
+            Dict[OntologyAttribute, Any]: Mapping from attribute to default
         """
         attributes = dict()
         for superclass in self.superclasses:
@@ -85,8 +104,8 @@ def _compute_axioms(self, iri, rdflib_predicate):
                 connected to axioms (subclass or equivalentClass).
         """
         self._cached_axioms = self._cached_axioms or []
-        triple = (iri, rdflib_predicate, None)
-        for _, _, o in self.namespace._graph.triples(triple):
+        for o in self._namespace_registry._graph.objects(iri,
+                                                         rdflib_predicate):
             if not isinstance(o, BNode):
                 continue
             try:
@@ -105,33 +124,20 @@ def _get_attributes(self, iri):
         Returns:
             Dict[OntologyAttribute, str]: Mapping from attribute to default
         """
-        graph = self._namespace_registry._graph
         attributes = dict()
-
-        blacklist = [OWL.topDataProperty, OWL.bottomDataProperty]
         # Case 1: domain of Datatype
-        triple = (None, RDFS.domain, iri)
-        for a_iri, _, _ in self.namespace._graph.triples(triple):
-            triple = (a_iri, RDF.type, OWL.DatatypeProperty)
-            if triple not in graph or isinstance(a_iri, BNode) \
-                    or a_iri in blacklist:
-                continue
-            a = self.namespace._namespace_registry.from_iri(a_iri)
+        for a_iri in self._get_attributes_identifiers_from_domain(iri):
+            a = self._namespace_registry.from_iri(a_iri)
             default = self._get_default(a_iri, iri)
             attributes[a] = (default, False, None)
 
         # Case 2: axioms
-        triple = (iri, RDFS.subClassOf, None)
-        for _, _, o in self.namespace._graph.triples(triple):
-            if (o, RDF.type, OWL.Restriction) not in graph:
-                continue
-            a_iri = graph.value(o, OWL.onProperty)
-            triple = (a_iri, RDF.type, OWL.DatatypeProperty)
-            if triple not in graph or isinstance(a_iri, BNode):
-                continue
-            a = self.namespace._namespace_registry.from_iri(a_iri)
+        graph = self._namespace_registry._graph
+        for a_iri, o in self._get_attributes_identifiers_from_axioms(
+                iri, return_restriction=True):
+            a = self._namespace_registry.from_iri(a_iri)
             cuba_default = self._get_default(a_iri, iri)
-            restriction_default = graph.value(o, OWL.hasValue)
+            restriction_default = graph.value(o, CACHE['owl:hasValue'])
             default = cuba_default or restriction_default
             dt, obligatory = self._get_datatype_for_restriction(o)
             obligatory = default is None and obligatory
@@ -140,17 +146,46 @@ def _get_attributes(self, iri):
         # TODO more cases
         return attributes
 
+    def _get_attributes_identifiers(self, iri):
+        yield from self._get_attributes_identifiers_from_domain(iri)
+        yield from self._get_attributes_identifiers_from_axioms(iri)
+
+    def _get_attributes_identifiers_from_domain(self, iri):
+        # Case 1: domain of Datatype
+        graph = self._namespace_registry._graph
+        blacklist = [OWL.topDataProperty, OWL.bottomDataProperty]
+        for a_iri in graph.subjects(CACHE['rdfs:domain'], iri):
+            if ((a_iri, CACHE['rdf:type'], CACHE['owl:DatatypeProperty'])
+                    not in graph
+                    or isinstance(a_iri, BNode)
+                    or a_iri in blacklist):
+                continue
+            yield a_iri
+
+    def _get_attributes_identifiers_from_axioms(self, iri,
+                                                return_restriction=False):
+        # Case 2: axioms
+        graph = self._namespace_registry._graph
+        for o in graph.objects(iri, CACHE['rdfs:subClassOf']):
+            if (o, CACHE['rdf:type'], CACHE['owl:Restriction']) not in graph:
+                continue
+            a_iri = graph.value(o, CACHE['owl:onProperty'])
+            if (a_iri, CACHE['rdf:type'], CACHE['owl:DatatypeProperty'])\
+                    not in graph or isinstance(a_iri, BNode):
+                continue
+            yield a_iri if not return_restriction else (a_iri, o)
+
     def _get_datatype_for_restriction(self, r):
         obligatory = False
         dt = None
-        g = self.namespace._graph
+        g = self._namespace_registry._graph
 
-        dt = g.value(r, OWL.someValuesFrom)
+        dt = g.value(r, CACHE['owl:someValuesFrom'])
         obligatory = dt is not None
-        dt = dt or g.value(r, OWL.allValuesFrom)
-        dt = dt or g.value(r, OWL.hasValue)
-        obligatory = obligatory or (r, OWL.cardinality) != 0
-        obligatory = obligatory or (r, OWL.minCardinality) != 0
+        dt = dt or g.value(r, CACHE['owl:allValuesFrom'])
+        dt = dt or g.value(r, CACHE['owl:hasValue'])
+        obligatory = obligatory or (r, CACHE['owl:cardinality']) != 0
+        obligatory = obligatory or (r, CACHE['owl:minCardinality']) != 0
         return dt, obligatory
 
     def _get_default(self, attribute_iri, superclass_iri):
@@ -164,12 +199,12 @@ def _get_default(self, attribute_iri, superclass_iri):
         Returns:
             Any: the default
         """
-        triple = (superclass_iri, rdflib_cuba._default, None)
-        for _, _, bnode in self.namespace._graph.triples(triple):
-            x = (bnode, rdflib_cuba._default_attribute, attribute_iri)
-            if x in self.namespace._graph:
-                return self.namespace._graph.value(bnode,
-                                                   rdflib_cuba._default_value)
+        for bnode in self._namespace_registry._graph.objects(
+                superclass_iri, CACHE['cuba:_default']):
+            x = (bnode, CACHE['cuba:_default_attribute'], attribute_iri)
+            if x in self._namespace_registry._graph:
+                return self._namespace_registry._graph.value(
+                    bnode, CACHE['cuba:_default_value'])
 
     def get_attribute_by_argname(self, name):
         """Get the attribute object with the argname of the object.
@@ -195,6 +230,36 @@ def get_attribute_by_argname(self, name):
                 )
                 return attribute
 
+    def get_attribute_identifier_by_argname(self, name):
+        """Get the attribute identifier with the argname of the object.
+
+        Args:
+            name (str): The argname of the attribute
+
+        Returns:
+            Identifier: The attribute identifier.
+        """
+        for superclass in self.superclasses:
+            for identifier in self._get_attributes_identifiers(superclass.iri):
+                attribute_name = self._namespace_registry._get_entity_name(
+                    identifier,
+                    self._namespace_registry._get_namespace_name_and_iri(
+                        identifier)[1])
+                if attribute_name == name:
+                    return identifier
+                elif attribute_name.lower() == name:
+                    logger.warning(
+                        f"Attribute {attribute_name} is referenced "
+                        f"with '{attribute_name.lower()}'. "
+                        f"Note that you must match the case of the definition "
+                        f"in the ontology in future releases. Additionally, "
+                        f"entity names defined in YAML ontology are no longer "
+                        f"required to be ALL_CAPS. You can use the "
+                        f"yaml2camelcase commandline tool to transform entity "
+                        f"names to CamelCase."
+                    )
+                    return identifier
+
     def _get_attributes_values(self, kwargs, _force):
         """Get the cuds object's attributes from the given kwargs.