Skip to content

Commit

Permalink
Merge pull request #755 from simphony/dev
Browse files Browse the repository at this point in the history
Merge release 3.7.0.
  • Loading branch information
kysrpex committed Feb 10, 2022
2 parents cbfa060 + 635eb42 commit 638fc6a
Show file tree
Hide file tree
Showing 12 changed files with 496 additions and 75 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ www.simphony-project.eu).

## Installation

See [https://simphony.readthedocs.io/en/latest/installation.html](https://simphony.readthedocs.io/en/latest/installation.html).
OSP-core is available on PyPI, so it can be installed using `pip`

```shell
pip install osp-core
```

For more detailed instructions, see [https://simphony.readthedocs.io/en/latest/installation.html](https://simphony.readthedocs.io/en/latest/installation.html).

## Visualization of ontologies

Expand Down
2 changes: 1 addition & 1 deletion osp/core/ontology/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ def _get_labels_for_iri(self, iri, lang=None, _return_literal=False,
Args:
iri (rdflib.URIRef): the target iri.
lang (str): retrieve labels only on a speific language.
lang (str): retrieve labels only on a specific language.
_return_literal: return rdflib.Literal instead of str, so that the
language of the labels is known to the caller.
Expand Down
2 changes: 1 addition & 1 deletion osp/core/ontology/namespace_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def _get_entity_name(self, entity_iri, ns_iri):
_return_literal=True,
_return_label_property=True))
if not labels:
logger.warning(f"No label for {entity_iri}")
logger.debug(f"No label for {entity_iri}")
else:
labels = sorted(labels,
key=lambda x:
Expand Down
16 changes: 9 additions & 7 deletions osp/core/ontology/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def _overlay_add_cuba_triples(parser: Union[OntologyParser, 'Ontology'],
logger.warning(f"Specified relationship {iri} as "
f"active relationship, which is not "
f"a valid object property in the ontology."
f"If such relationship belongs to another"
f"If such relationship belongs to another "
f"ontology, and such ontology is installed, "
f"then you may safely ignore this warning.")
# This requirement is checked later on in
Expand Down Expand Up @@ -233,12 +233,13 @@ def labels_for_iri(iri):
lang=None,
_return_literal=True)

# Finally check for the duplicate labels.
# Finally, check for the duplicate labels.
subjects = set(subject for subject in graph.subjects()
if in_namespace(subject))
results = sorted(((label.toPython(), label.language), iri)
for iri in subjects for label
in labels_for_iri(iri))
results = set(((label.toPython(), label.language or ''), iri)
for iri in subjects for label
in labels_for_iri(iri))
results = sorted(results)
labels, iris = tuple(result[0] for result in results), \
tuple(result[1] for result in results)
coincidence_search = tuple(i
Expand All @@ -248,12 +249,13 @@ def labels_for_iri(iri):
for i in coincidence_search:
conflicting_labels[labels[i]] |= {iris[i - 1], iris[i]}
if len(conflicting_labels) > 0:
texts = (f'{label[0]}, language {label[1]}: '
texts = (f'{label[0]}, language '
f'{label[1] if label[1] != "" else None}: '
f'{", ".join(tuple(str(iri) for iri in iris))}'
for label, iris in conflicting_labels.items())
raise KeyError(f'The following labels are assigned to more than '
f'one entity in namespace {namespace}; '
f'{"; ".join(texts)}.')
f'{"; ".join(texts)} .')


def _check_namespaces(namespace_iris: Iterable[URIRef],
Expand Down
138 changes: 115 additions & 23 deletions osp/core/session/db/db_wrapper_session.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
"""An abstract session containing method useful for all database backends."""

from abc import abstractmethod
from typing import Union
import itertools
import logging
import rdflib
import uuid
from osp.core.utils.general import uid_from_iri, CUDS_IRI_PREFIX
from abc import abstractmethod
from typing import Union

import rdflib

import osp.core.warnings as warning_settings
from osp.core.ontology.namespace_registry import namespace_registry
from osp.core.session.wrapper_session import consumes_buffers, WrapperSession
from osp.core.session.result import returns_query_result
from osp.core.session.buffers import BufferContext, EngineContext
from osp.core.utils.general import uid_from_iri, CUDS_IRI_PREFIX

logger = logging.getLogger(__name__)

Expand All @@ -27,11 +30,12 @@ def commit(self):
try:
root_obj = self._registry.get(self.root)
added, updated, deleted = self._buffers[BufferContext.USER]
if warning_settings.unreachable_cuds_objects:
self._unreachable_warning(root_obj)
self._apply_added(root_obj, added)
self._apply_updated(root_obj, updated)
self._apply_deleted(root_obj, deleted)
self._reset_buffers(BufferContext.USER)
self._unreachable_warning(root_obj)
self._commit()
except Exception as e:
self._rollback_transaction()
Expand Down Expand Up @@ -176,24 +180,112 @@ def _unreachable_warning(self, root_obj: Union[rdflib.URIRef, uuid.UUID]):
raises a warning that lists some of the unreachable CUDS objects.
Args:
root_obj (Union[URIRef, UUID]): The root object with respect to
which objects are deemed reachable or unreachable.
root_obj: The root object with respect to which objects are
deemed reachable or unreachable.
"""
unreachable = self._registry._get_not_reachable(root_obj, rel=None)
large_dataset_warning = LargeDatasetWarning()
unreachable, reachable = self._registry._get_not_reachable(
root_obj, rel=None, return_reachable=True,
warning=large_dataset_warning
)

# Warn about unreachable CUDS
max_cuds_on_warning = 5
if len(unreachable) > 0:
warning = "Some CUDS objects are unreachable " \
"from the wrapper object: " \
"{cuds}{more}." \
"\n" \
"If you want to be able to retrieve those CUDS " \
"objects later, either add them to the wrapper object " \
"or to any other CUDS that is reachable from it." \
.format(cuds=', '.join(str(x) for x in itertools
.islice(unreachable,
max_cuds_on_warning)),
more=" and " + str(len(unreachable)
- max_cuds_on_warning)
+ " more" if len(unreachable) > 5
else "")
logger.warning(warning)
unreachable_cuds_warning = (
"Some CUDS objects are unreachable from the wrapper object: "
"{cuds}{more}. \n"
"If you want to be able to retrieve those CUDS objects later, "
"either add them to the wrapper object or to any other CUDS "
"that is reachable from it."
).format(
cuds=', '.join(str(x) for x in itertools
.islice(unreachable, max_cuds_on_warning)),
more=" and " + str(len(unreachable) - max_cuds_on_warning)
+ " more" if len(unreachable) > 5 else "")
# A filter is applied to the logger that attaches the warning
# type to the log records.
logger_filter = UnreachableCUDSWarningFilter()
logger.addFilter(logger_filter)
logger.warning(unreachable_cuds_warning)
logger.removeFilter(logger_filter)

# Inform the large dataset warning that the unreachable CUDS
# warning was raised (so that it changes its text).
large_dataset_warning.unreachable_cuds_warning = True

# Warn about large datasets and recommend disabling the unreachable
# CUDS warning for large datasets.
if len(reachable) + len(unreachable) >= \
warning_settings.unreachable_cuds_objects_large_dataset_size:
# Recommend disabling the warning for large datasets.
large_dataset_warning.warn()


class UnreachableCUDSWarning(UserWarning):
"""Shown when CUDS are unreachable from the wrapper.
Used by `DbWrapperSession._unreachable_warning` during the commit
operation.
"""


class UnreachableCUDSWarningFilter(logging.Filter):
"""Attaches the `UnreachableCUDSWarning` class to the records."""

def filter(self, record):
"""Attaches the `UnreachableCUDSWarning` to the records."""
record.warning_class = UnreachableCUDSWarning
return True


class LargeDatasetWarning(UserWarning):
"""Shown while working with a large dataset.
Used by `DbWrapperSession._unreachable_warning`, during the commit
operation.
"""
warned: bool = False
unreachable_cuds_warning: bool = False

def warn(self) -> None:
"""Show the warning.
The warning will be only shown once. If you want to show the warning
again, you must create a new instance of `LargeDatasetWarning`.
"""
if self.warned:
return

# Recommend disabling the `UnreachableCUDSWarning` for large datasets.
warning = (
"You are working with a large dataset. When committing "
"changes, OSP-core looks for objects that are unreachable "
"from the wrapper object to generate {reference_to_warning}. "
"Generating such warning is very expensive in computational "
"terms when small changes are applied to existing, "
"large datasets. You will notice that the changes may take a "
"lot of time to be committed. Please turn off such warning "
"when working with large datasets. You can turn off the "
"warning by running `import osp.core.warnings as "
"warning_settings; "
"warning_settings.unreachable_cuds_objects = False`.")
reference = ("a warning" if not self.unreachable_cuds_warning else
"the previous warning")
warning = warning.format(reference_to_warning=reference)
# A filter is applied to the logger that attaches the warning
# type to the log records.
logger_filter = LargeDatasetWarningFilter()
logger.addFilter(logger_filter)
logger.warning(warning)
logger.removeFilter(logger_filter)
self.warned = True


class LargeDatasetWarningFilter(logging.Filter):
"""Filter that attaches the `LargeDatasetWarning` class to the records."""

def filter(self, record):
"""Attaches the `LargeDatasetWarning` to the records."""
record.warning_class = LargeDatasetWarning
return True
76 changes: 63 additions & 13 deletions osp/core/session/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from rdflib import URIRef
import logging

import osp.core.warnings as warning_settings

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -58,7 +60,8 @@ def get(self, uid):
message = '{!r} is not a proper uid'
raise ValueError(message.format(uid))

def get_subtree(self, root, rel=None, skip=None):
def get_subtree(self, root, subtree=None, rel=None, skip=None,
warning=None):
"""Get all the elements in the subtree rooted at given root.
Only use the given relationship for traversal.
Expand All @@ -67,23 +70,59 @@ def get_subtree(self, root, rel=None, skip=None):
root (Union[UUID, URIRef, Cuds]): The root of the subtree.
rel (Relationship, optional): The relationship used for traversal.
Defaults to None. Defaults to None.
subtree (Set[Cuds]): Currently calculated subtree (this is a
recursive algorithm).
skip (Set[Cuds], optional): The elements to skip. Defaults to None.
Defaults to None.
warning (LargeDatasetWarning, optional): Raise a
`LargeDatasetWarning` when the subtree is large. When `None`,
no warning is raised. If you wish to raise the warning, a
`LargeDatasetWarning` object must be provided.
Returns:
Set[Cuds]: The set of elements in the subtree rooted in the given
uid.
"""
from osp.core.cuds import Cuds
skip = skip or set()
if not isinstance(root, Cuds):
if isinstance(root, (UUID, URIRef)):
root = super().__getitem__(root)
assert root.uid in self
subtree = {root}
for child in root.iter(rel=rel):
if child not in (skip | subtree):
subtree |= self.get_subtree(child.uid, rel,
skip=(skip | subtree))
skip = skip or set() | {root}
skip |= {root}
subtree = subtree or {root}

subclasses = set() if rel is None else rel.subclasses
subclass_check = (lambda r: True) \
if not subclasses else (lambda r: r in subclasses)
"""Checks whether relationship `x` should be considered.
- When no `rel` is provided, `subclass_check` should always return
True, as all relationships should be considered.
- When `rel` is provided, it should return true only if the
relationship `x` is a subclass of the provided relationship (`rel`).
"""

# Load neighbors connected through the relationship
filtered_neighbors = (
neighbor
for r, dict_target in root._neighbors.items()
if subclass_check(r)
for neighbor in dict_target
)
filtered_neighbors = set(root.session.load(*filtered_neighbors))

subtree |= filtered_neighbors

# Optional: raise a `LargeDatasetWarning` if the subtree is too large.
if warning is not None and len(subtree) \
> warning_settings \
.unreachable_cuds_objects_large_dataset_size:
warning.warn()
warning = None

for neighbor in filter(lambda x: x not in skip, filtered_neighbors):
self.get_subtree(neighbor, subtree=subtree, rel=rel, skip=skip,
warning=warning)
return subtree

def prune(self, *roots, rel=None):
Expand All @@ -103,7 +142,11 @@ def prune(self, *roots, rel=None):
super().__delitem__(x.uid)
return not_reachable

def _get_not_reachable(self, *roots, rel=None):
def _get_not_reachable(self,
*roots,
rel=None,
return_reachable=False,
warning=None):
"""Get all elements in the registry that are not reachable.
Use the given rel for traversal.
Expand All @@ -113,22 +156,29 @@ def _get_not_reachable(self, *roots, rel=None):
from these root elements.
rel (Relationship, optional): Only use this relationship for
traversal. Defaults to None.
return_reachable (bool): Returns also the uids of the reachable
cuds.
Returns:
List[Cuds]: The set of non reachable elements.
Union[List[Cuds],
Tuple[List[Cuds], Set[Union[UUID, URIRef]]]]: Either a
list of the unreachable CUDS when `return_reachable` is False
or a tuple whose first element is such list, and second
element a set with the uids of the reachable cuds.
"""
# Get all reachable Cuds objects
reachable = set()
for root in roots:
reachable |= self.get_subtree(root, rel=rel, skip=reachable)
reachable |= self.get_subtree(
root, rel=rel, skip=reachable, warning=warning)
reachable_uids = set([r.uid for r in reachable])

# Get all the Cuds objects that are not reachable
delete = list()
for uid in self.keys():
if uid not in reachable_uids:
delete.append(super().__getitem__(uid))
return delete
return delete if not return_reachable else (delete, reachable_uids)

def reset(self):
"""Delete the contents of the registry."""
Expand Down
6 changes: 6 additions & 0 deletions osp/core/session/transport/communication_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ def __init__(self, uri, handle_response, **kwargs):
"""
self.uri = uri
self.kwargs = kwargs
# The default `ping_timeout` is 20s. The pings are not sent during a
# transfer. Thus, if the transfer takes more than 20s, then the
# default value causes the websockets connection to close
# unexpectedly. Hence, we chose to never close the connection due to
# ping timeouts unless the user wishes to do so.
self.kwargs['ping_timeout'] = self.kwargs.get('ping_timeout', None)
self.handle_response = handle_response
self.websocket = None

Expand Down

1 comment on commit 638fc6a

@kysrpex
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.50.

Benchmark suite Current: 638fc6a Previous: 635eb42 Ratio
benchmark_cuds_api.py::benchmark_cuds_create 385.5192011685611 iter/sec (stddev: 0.006598974796613531) 607.6365250140163 iter/sec (stddev: 0.00425757155642059) 1.58
benchmark_cuds_api.py::benchmark_add_default 1102.8966316109766 iter/sec (stddev: 0.00018802817329449106) 1756.456883044433 iter/sec (stddev: 0.00008497178955291957) 1.59
benchmark_cuds_api.py::benchmark_get_byuiduriref 288.3646163966868 iter/sec (stddev: 0.0009759221792346276) 465.09750427330164 iter/sec (stddev: 0.00025552976934833217) 1.61
benchmark_cuds_api.py::benchmark_cuds_is_a 6804.397191986568 iter/sec (stddev: 0.00010118231278397455) 11552.723731376334 iter/sec (stddev: 0.00005475464459677695) 1.70
benchmark_cuds_api.py::benchmark_cuds_oclass 8653.037094885041 iter/sec (stddev: 0.000021429817896318477) 15647.727346027857 iter/sec (stddev: 0.000015393623801881135) 1.81
benchmark_cuds_api.py::benchmark_cuds_uid 13328.58196046166 iter/sec (stddev: 0.000019742996625737058) 22654.018897727678 iter/sec (stddev: 0.000017784265023640967) 1.70
benchmark_cuds_api.py::benchmark_cuds_iri 12089.186181820303 iter/sec (stddev: 0.000019560577453230282) 18885.624277889903 iter/sec (stddev: 0.00002070254024985459) 1.56

This comment was automatically generated by workflow using github-action-benchmark.

CC: @yoavnash @pablo-de-andres @kysrpex

Please sign in to comment.