Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xref affil.20240312 #97

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 24 additions & 0 deletions adsingestp/parsers/crossref.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this example, affiliations are specified by ROR, rather than an explicit string. Is there an easy way to capture and convert these?

Expand Up @@ -265,6 +265,30 @@ def _parse_contrib(self):
affil = [a.get_text() for a in c.find_all("affiliation")]
if affil:
contrib_tmp["aff"] = affil
elif c.find("affiliations"):
affil = []
institutions = c.find("affiliations").find_all("institution")
if institutions:
for inst in institutions:
name = inst.find("institution_name")
dept = inst.find("institution_department")
acro = inst.find("institution_acronym")
place = inst.find("institution_place")
taglist = []
if dept:
taglist.append(dept.get_text())
if name:
taglist.append(name.get_text())
if acro:
taglist.append(acro.get_text())
if place:
taglist.append(place.get_text())
if taglist:
affstring = ", ".join(taglist)
affstring = re.sub(r"\s+,", ",", affstring)
affil.append(affstring)
if affil:
contrib_tmp["aff"] = affil

role = c.get("contributor_role", "unknown")

Expand Down
57 changes: 50 additions & 7 deletions tests/stubdata/output/crossref_cn_10.1093=mnras=stac2975.json
Expand Up @@ -41,19 +41,37 @@
"name": {
"surname": "Yang",
"given_name": "Haifeng"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "Shi",
"given_name": "Chenhui"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "Cai",
"given_name": "Jianghui"
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
},
{
"affPubRaw": "School of Computer Science and Technology, North University of China, Taiyuan 030051, China"
}
],
"attrib": {
"orcid": "0000-0001-6945-8093"
}
Expand All @@ -62,31 +80,56 @@
"name": {
"surname": "Zhou",
"given_name": "Lichan"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "Yang",
"given_name": "Yuqing"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "Zhao",
"given_name": "Xujun"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "He",
"given_name": "Yanting"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
},
{
"name": {
"surname": "Hao",
"given_name": "Jing"
}
},
"affiliation": [
{
"affPubRaw": "School of Computer Science and Technology, Taiyuan University of Science and Technology, Taiyuan 030024, China"
}
]
}
],
"title": {
Expand Down
56 changes: 50 additions & 6 deletions tests/stubdata/output/crossref_cn_10.1093=pasj=psac053.json
Expand Up @@ -42,6 +42,11 @@
"surname": "Shimoda",
"given_name": "Jiro"
},
"affiliation": [
{
"affPubRaw": "Department of Physics, Graduate School of Science, Nagoya University, Furo-cho, Chikusa-ku, Nagoya, Aichi 464-8602, Japan"
}
],
"attrib": {
"orcid": "0000-0003-3383-2279"
}
Expand All @@ -50,37 +55,76 @@
"name": {
"surname": "Ohira",
"given_name": "Yutaka"
}
},
"affiliation": [
{
"affPubRaw": "Department of Earth and Planetary Science, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-0033, Japan"
}
]
},
{
"name": {
"surname": "Bamba",
"given_name": "Aya"
}
},
"affiliation": [
{
"affPubRaw": "Department of Physics, Graduate School of Science, the University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-0033, Japan"
},
{
"affPubRaw": "Research Center for the Early Universe, School of Science, The University of Tokyo, 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-0033, Japan"
}
]
},
{
"name": {
"surname": "Terada",
"given_name": "Yukikatsu"
}
},
"affiliation": [
{
"affPubRaw": "Graduate School of Science and Engineering, Saitama University, 255 Shimo-Ohkubo, Saitama, Saitama 338-8570, Japan"
},
{
"affPubRaw": "Institute of Space and Astronautical Science, Japan Aerospace Exploration Agency, 3-1-1 Yoshinodai, Chuo, Sagamihara, Kanagawa 252-5210, Japan"
}
]
},
{
"name": {
"surname": "Yamazaki",
"given_name": "Ryo"
}
},
"affiliation": [
{
"affPubRaw": "Department of Physical Sciences, Aoyama Gakuin University, 5-10-1 Fuchinobe, Chuo, Sagamihara, Kanagawa 252-5258, Japan"
},
{
"affPubRaw": "Institute of Laser Engineering, Osaka University, 2-6 Yamadaoka, Suita, Osaka 565-0871, Japan"
}
]
},
{
"name": {
"surname": "Inoue",
"given_name": "Tsuyoshi"
}
},
"affiliation": [
{
"affPubRaw": "Department of Physics, Konan University, 8-9-1 Okamoto, Higashinada-ku, Kobe, Hyogo 658-8501, Japan"
}
]
},
{
"name": {
"surname": "Tanaka",
"given_name": "Shuta J"
}
},
"affiliation": [
{
"affPubRaw": "Department of Physical Sciences, Aoyama Gakuin University, 5-10-1 Fuchinobe, Chuo, Sagamihara, Kanagawa 252-5258, Japan"
}
]
}
],
"title": {
Expand Down