Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the feature for automatically annotate relations #1254

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ config.py
# Standard locations of data and server temporary files
data/
work/

venv/
.idea/
44 changes: 44 additions & 0 deletions client/src/annotator_ui.js
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,16 @@ var AnnotatorUI = (function($, window, undefined) {
dispatcher.post('ajax', [tagOptions, 'edited']);
}

var tagRelationCurrentDocument = function(taggerId) {
var tagOptions = {
action: 'link',
collection: coll,
'document': doc,
tagger: taggerId,
};
dispatcher.post('ajax', [tagOptions, 'edited']);
}

var setupTaggerUI = function(response) {
var taggers = response.ner_taggers || [];
$taggerButtons = $('#tagger_buttons').empty();
Expand Down Expand Up @@ -2135,6 +2145,39 @@ var AnnotatorUI = (function($, window, undefined) {
}
}

var setupLinkerUI = function(response) {
var taggers = response.re_taggers || [];
$taggerButtons = $('#rel_tagger_buttons').empty();
$.each(taggers, function(taggerNo, tagger) {
// expect a tuple with ID, name, model, and URL
var taggerId = tagger[0];
var taggerName = tagger[1];
var taggerModel = tagger[2];
if (!taggerId || !taggerName || !taggerModel) {
dispatcher.post('messages', [[['Invalid tagger specification received from server', 'error']]]);
return true; // continue
}
var $row = $('<div class="optionRow"/>');
var $label = $('<span class="optionLabel">'+Util.escapeHTML(taggerName)+'</span>');
var $button = $('<input id="tag_'+Util.escapeHTML(taggerId)+'_button" type="button" value="'+Util.escapeHTML(taggerModel)+'" tabindex="-1" title="Automatically tag the current document."/>');
$row.append($label).append($button);
$taggerButtons.append($row);
$button.click(function(evt) {
tagRelationCurrentDocument(taggerId);
});
});
$taggerButtons.find('input').button();
// if nothing was set up, hide the whole fieldset and show
// a message to this effect, else the other way around
if ($taggerButtons.find('input').length == 0) {
$('#auto_rel_tagging_fieldset').hide();
$('#no_rel_tagger_message').show();
} else {
$('#auto_rel_tagging_fieldset').show();
$('#no_rel_tagger_message').hide();
}
}

// recursively traverses type hierarchy (entity_types or
// event_types) and stores normalizations in normDbsByType.
var rememberNormDbsForType = function(types) {
Expand Down Expand Up @@ -2810,6 +2853,7 @@ var AnnotatorUI = (function($, window, undefined) {
on('dataReady', rememberData).
on('collectionLoaded', rememberSpanSettings).
on('collectionLoaded', setupTaggerUI).
on('collectionLoaded', setupLinkerUI).
on('collectionLoaded', setupNormalizationUI).
on('spanAndAttributeTypesLoaded', spanAndAttributeTypesLoaded).
on('newSourceData', onNewSourceData).
Expand Down
12 changes: 12 additions & 0 deletions diff.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,18 @@
<div style="color:gray; font-size:80%; text-align:center; margin:1em">(No tools set up. Please contact server maintainer if needed.)</div>
</fieldset>
</div>
<!-- Data dialog automatic relations annotation section -->
<div id="auto_rel_tagging_login_control" class="login">
<fieldset id="auto_rel_tagging_fieldset" class="small-buttons">
<legend>Automatic relation annotation</legend>
<div class="optionRow">Automatically tag relations on current document</div>
<div id="rel_tagger_buttons"/>
</fieldset>
<fieldset id="no_relation_tagger_message" style="display:none">
<legend>Automatic relation annotation</legend>
<div style="color:gray; font-size:80%; text-align:center; margin:1em">(No tools set up. Please contact server maintainer if needed.)</div>
</fieldset>
</div>
<!-- Data dialog import section -->
<fieldset class="login small-buttons">
<legend>Import</legend>
Expand Down
12 changes: 12 additions & 0 deletions index.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,18 @@
<div style="color:gray; font-size:80%; text-align:center; margin:1em">(No tools set up. Please contact server administrator if needed.)</div>
</fieldset>
</div>
<!-- Data dialog automatic relations annotation section -->
<div id="auto_rel_tagging_login_control" class="login">
<fieldset id="auto_rel_tagging_fieldset" class="small-buttons">
<legend>Automatic relation annotation</legend>
<div class="optionRow">Automatically tag relations on current document</div>
<div id="rel_tagger_buttons"/>
</fieldset>
<fieldset id="no_rel_tagger_message" style="display:none">
<legend>Automatic relation annotation</legend>
<div style="color:gray; font-size:80%; text-align:center; margin:1em">(No tools set up. Please contact server maintainer if needed.)</div>
</fieldset>
</div>
<!-- Data dialog import section -->
<fieldset class="login small-buttons">
<legend>Import</legend>
Expand Down
4 changes: 3 additions & 1 deletion server/src/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from search import search_text, search_entity, search_event, search_relation, search_note
from predict import suggest_span_types
from undo import undo
from tag import tag
from tag import tag, link
from delete import delete_document, delete_collection
from norm import norm_get_name, norm_search, norm_get_data

Expand Down Expand Up @@ -90,6 +90,7 @@ def logging_no_op(collection, document, log):

'undo': undo,
'tag': tag,
'link': link,

'deleteDocument': delete_document,
'deleteCollection': delete_collection,
Expand Down Expand Up @@ -134,6 +135,7 @@ def logging_no_op(collection, document, log):
'searchNoteInCollection',

'tag',
'link',
))

# Sanity check
Expand Down
7 changes: 7 additions & 0 deletions server/src/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ def get_annotator_config(directory):
# where most annotators are expected to be human. Rethink.
return ProjectConfiguration(directory).get_annotator_config()

def get_linker_config(directory):
return ProjectConfiguration(directory).get_linker_config()

def assert_allowed_to_read(doc_path):
if not allowed_to_read(doc_path):
raise AccessDeniedError # Permission denied by access control
Expand Down Expand Up @@ -586,6 +589,9 @@ def get_directory_information(collection):
# fill in NER services, if any
ner_taggers = get_annotator_config(real_dir)

# fill in RE services, if any
re_taggers = get_linker_config(real_dir)

return _inject_annotation_type_conf(real_dir, json_dic={
'items': combolist,
'header' : doclist_header,
Expand All @@ -597,6 +603,7 @@ def get_directory_information(collection):
'normalization_config' : normalization_config,
'annotation_logging': ann_logging,
'ner_taggers': ner_taggers,
're_taggers': re_taggers
})

class UnableToReadTextFile(ProtocolError):
Expand Down
22 changes: 19 additions & 3 deletions server/src/projectconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ class InvalidProjectConfigException(Exception):
# tools config section name constants
SEARCH_SECTION = "search"
ANNOTATORS_SECTION = "annotators"
LINKERS_SECTION = "linkers"
DISAMBIGUATORS_SECTION = "disambiguators"
NORMALIZATION_SECTION = "normalization"

__expected_tools_sections = (OPTIONS_SECTION, SEARCH_SECTION, ANNOTATORS_SECTION, DISAMBIGUATORS_SECTION, NORMALIZATION_SECTION)
__optional_tools_sections = (OPTIONS_SECTION, SEARCH_SECTION, ANNOTATORS_SECTION, DISAMBIGUATORS_SECTION, NORMALIZATION_SECTION)
__expected_tools_sections = (OPTIONS_SECTION, SEARCH_SECTION, ANNOTATORS_SECTION, LINKERS_SECTION, DISAMBIGUATORS_SECTION, NORMALIZATION_SECTION)
__optional_tools_sections = (OPTIONS_SECTION, SEARCH_SECTION, ANNOTATORS_SECTION, LINKERS_SECTION, DISAMBIGUATORS_SECTION, NORMALIZATION_SECTION)

# special relation types for marking which spans can overlap
# ENTITY_NESTING_TYPE used up to version 1.3, now deprecated
Expand Down Expand Up @@ -711,6 +712,7 @@ def get_visual_configs(directory):
OPTIONS_SECTION : [],
SEARCH_SECTION : [TypeHierarchyNode(["google"], ["<URL>:http://www.google.com/search?q=%s"])],
ANNOTATORS_SECTION : [],
LINKERS_SECTION : [],
DISAMBIGUATORS_SECTION : [],
NORMALIZATION_SECTION : [],
}
Expand Down Expand Up @@ -781,6 +783,9 @@ def get_search_config(directory):
def get_annotator_config(directory):
return get_tools_configs(directory)[0][ANNOTATORS_SECTION]

def get_linker_config(directory):
return get_tools_configs(directory)[0][LINKERS_SECTION]

def get_disambiguator_config(directory):
return get_tools_configs(directory)[0][DISAMBIGUATORS_SECTION]

Expand Down Expand Up @@ -875,7 +880,14 @@ def get_annotator_config_list(directory):
if directory not in cache:
cache[directory] = __type_hierarchy_to_list(get_annotator_config(directory))
return cache[directory]
get_annotator_config_list.__cache = {}
get_annotator_config_list.__cache = {}

def get_linker_config_list(directory):
cache = get_linker_config_list.__cache
if directory not in cache:
cache[directory] = __type_hierarchy_to_list(get_linker_config(directory))
return cache[directory]
get_linker_config_list.__cache = {}

def get_disambiguator_config_list(directory):
cache = get_disambiguator_config_list.__cache
Expand Down Expand Up @@ -1536,6 +1548,10 @@ def get_annotator_config(self):
tool_list = get_annotator_config_list(self.directory)
return self._get_tool_config(tool_list)

def get_linker_config(self):
tool_list = get_linker_config_list(self.directory)
return self._get_tool_config(tool_list)

def get_normalization_config(self):
norm_list = get_normalization_config_list(self.directory)
norm_config = []
Expand Down
111 changes: 107 additions & 4 deletions server/src/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
from socket import error as SocketError
from urlparse import urlparse

from annotation import TextAnnotations, TextBoundAnnotationWithText
from annotation import TextAnnotations, TextBoundAnnotationWithText, BinaryRelationAnnotation
from annotation import NormalizationAnnotation
from annotator import _json_from_ann, ModificationTracker
from common import ProtocolError
from document import real_directory
from jsonwrap import loads
from jsonwrap import loads, dumps
from message import Messager
from projectconfig import ProjectConfiguration

Expand Down Expand Up @@ -87,6 +87,9 @@ def _is_textbound(ann):
def _is_normalization(ann):
return 'target' in ann

def _is_relation(ann):
return 'rel_type' in ann

def tag(collection, document, tagger):
pconf = ProjectConfiguration(real_directory(collection))
for tagger_token, _, _, tagger_service_url in pconf.get_annotator_config():
Expand Down Expand Up @@ -157,8 +160,7 @@ def tag(collection, document, tagger):
mods = ModificationTracker()
cidmap = {}

for cid, ann in ((i, a) for i, a in json_resp.iteritems()
if _is_textbound(a)):
for cid, ann in ((i, a) for i, a in json_resp.iteritems() if _is_textbound(a)):
assert 'offsets' in ann, 'Tagger response lacks offsets'
offsets = ann['offsets']
assert 'type' in ann, 'Tagger response lacks type'
Expand Down Expand Up @@ -202,6 +204,107 @@ def tag(collection, document, tagger):
mod_resp['annotations'] = _json_from_ann(ann_obj)
return mod_resp


def link(collection, document, tagger):
pconf = ProjectConfiguration(real_directory(collection))
for linker_token, _, _, linker_service_url in pconf.get_linker_config():
if tagger == linker_token:
break
else:
raise UnknownTaggerError(tagger)

with TextAnnotations(path_join(real_directory(collection), document)) as ann_obj:

url_soup = urlparse(linker_service_url)

if url_soup.scheme == 'http':
Connection = HTTPConnection
elif url_soup.scheme == 'https':
# Delayed HTTPS import since it relies on SSL which is commonly
# missing if you roll your own Python, for once we should not
# fail early since tagging is currently an edge case and we
# can't allow it to bring down the whole server.
from httplib import HTTPSConnection
Connection = HTTPSConnection
else:
raise InvalidConnectionSchemeError(linker_token, url_soup.scheme)

conn = None
try:
conn = Connection(url_soup.netloc)
req_headers = {
'Content-type': 'text/plain; charset=utf-8',
'Accept': 'application/json',
}
# Build a new service URL since the request method doesn't accept
# a parameters argument
service_url = url_soup.path + (
'?' + url_soup.query if url_soup.query else '')
try:
entities = list()
for e in ann_obj.get_entities():
s = "{}\t{} {} {}\t{}\n".format(
e.id,
e.type,
e.start,
e.end,
e.text.encode('utf-8')
)
entities.append(s)
data = {
'document': ann_obj.get_document_text().encode('utf-8'),
'entities': entities
}
# req_headers['Content-length'] = len(data)
# Note: Trout slapping for anyone sending Unicode objects here
conn.request('POST',
# As per: http://bugs.python.org/issue11898
# Force the url to be an ascii string
str(service_url),
dumps(data),
headers=req_headers)
except SocketError, e:
raise TaggerConnectionError(linker_token, e)
resp = conn.getresponse()

# Did the request succeed?
if resp.status != 200:
raise TaggerConnectionError(linker_token,
'%s %s' % (resp.status, resp.reason))
# Finally, we can read the response data
resp_data = resp.read()
finally:
if conn is not None:
conn.close()

try:
json_resp = loads(resp_data)
except ValueError:
raise InvalidTaggerResponseError(linker_token, resp_data)

mods = ModificationTracker()
cidmap = {}

for cid, ann in ((i, a) for i, a in json_resp.iteritems() if _is_relation(a)):
assert 'rel_type' in ann, 'Tagger response lacks rel_type'
rel_type = ann['rel_type']
assert 'arg1' in ann, 'Tagger response lacks arg1'
arg1 = ann['arg1']
assert 'arg2' in ann, 'Tagger response lacks arg2'
arg2 = ann['arg2']

_id = ann_obj.get_new_id('R')
cidmap[cid] = _id

tb = BinaryRelationAnnotation(_id, rel_type, 'Arg1', arg1, 'Arg2', arg2, "")

mods.addition(tb)
ann_obj.add_annotation(tb)

mod_resp = mods.json_response()
mod_resp['annotations'] = _json_from_ann(ann_obj)
return mod_resp

if __name__ == '__main__':
# Silly test, but helps
tag('/BioNLP-ST_2011_ID_devel', 'PMC1874608-01-INTRODUCTION', 'random')