/
models.py
193 lines (144 loc) · 5.88 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# coding: utf-8
"""
sickle.models
~~~~~~~~~~~~~
Collects classes for OAI-specific entities.
:copyright: Copyright 2015 Mathias Loesch
"""
from lxml import etree
from .utils import get_namespace, xml_to_dict
from ._compat import PY3
class ResumptionToken(object):
"""Represents a resumption token."""
def __init__(self, token='', cursor='', complete_list_size='',
expiration_date=''):
self.token = token
self.cursor = cursor
self.complete_list_size = complete_list_size
self.expiration_date = expiration_date
def __repr__(self):
return '<ResumptionToken %s>' % self.token
class OAIItem(object):
"""A generic OAI item.
:param xml: XML representation of the entity.
:param strip_ns: Flag for whether to remove the namespaces from the
element names in the dictionary representation.
"""
def __init__(self, xml, strip_ns=True):
super(OAIItem, self).__init__()
#: The original parsed XML
self.xml = xml
self._strip_ns = strip_ns
self._oai_namespace = get_namespace(self.xml)
def __bytes__(self):
return etree.tounicode(self.xml).encode("utf8")
def __str__(self):
return self.__unicode__() if PY3 else self.__bytes__()
def __unicode__(self):
return etree.tounicode(self.xml)
@property
def raw(self):
"""The original XML as unicode."""
return etree.tounicode(self.xml)
class Identify(OAIItem):
"""Represents an Identify container.
This object differs from the other entities in that is has to be created
from a :class:`sickle.response.OAIResponse` instead of an XML element.
:param identify_response: The response for an Identify request.
:type identify_response: :class:`sickle.OAIResponse`
"""
def __init__(self, identify_response):
super(Identify, self).__init__(identify_response.xml, strip_ns=True)
self.xml = self.xml.find('.//' + self._oai_namespace + 'Identify')
self._identify_dict = xml_to_dict(self.xml, strip_ns=True)
for k, v in self._identify_dict.items():
setattr(self, k.replace('-', '_'), v[0])
def __repr__(self):
return '<Identify>'
def __iter__(self):
return iter(self._identify_dict.items()) if PY3 else \
self._identify_dict.iteritems()
class Header(OAIItem):
"""Represents an OAI Header.
:param header_element: The XML element 'header'.
:type header_element: :class:`lxml.etree._Element`
"""
def __init__(self, header_element):
super(Header, self).__init__(header_element, strip_ns=True)
self.deleted = self.xml.attrib.get('status') == 'deleted'
self.identifier = self.xml.find(
self._oai_namespace + 'identifier').text
self.datestamp = self.xml.find(
self._oai_namespace + 'datestamp').text
self.setSpecs = [setSpec.text for setSpec in
self.xml.findall(self._oai_namespace + 'setSpec')]
def __repr__(self):
if self.deleted:
return '<Header %s [deleted]>' % self.identifier
else:
return '<Header %s>' % self.identifier
def __iter__(self):
return iter([
('identifier', self.identifier),
('datestamp', self.datestamp),
('setSpecs', self.setSpecs)
])
class Record(OAIItem):
"""Represents an OAI record.
:param record_element: The XML element 'record'.
:type record_element: :class:`lxml.etree._Element`
:param strip_ns: Flag for whether to remove the namespaces from the
element names.
"""
def __init__(self, record_element, strip_ns=True):
super(Record, self).__init__(record_element, strip_ns=strip_ns)
self.header = Header(self.xml.find(
'.//' + self._oai_namespace + 'header'))
self.deleted = self.header.deleted
if not self.deleted:
# We want to get record/metadata/<container>/*
# <container> would be the element ``dc``
# in the ``oai_dc`` case.
self.metadata = xml_to_dict(
self.xml.find(
'.//' + self._oai_namespace + 'metadata'
).getchildren()[0], strip_ns=self._strip_ns)
def __repr__(self):
if self.header.deleted:
return '<Record %s [deleted]>' % self.header.identifier
else:
return '<Record %s>' % self.header.identifier
def __iter__(self):
return iter(self.metadata.items()) if PY3 else \
self.metadata.iteritems()
class Set(OAIItem):
"""Represents an OAI set.
:param set_element: The XML element 'set'.
:type set_element: :class:`lxml.etree._Element`
"""
def __init__(self, set_element):
super(Set, self).__init__(set_element, strip_ns=True)
self._set_dict = xml_to_dict(self.xml, strip_ns=True)
for k, v in self._set_dict.items():
setattr(self, k.replace('-', '_'), v[0])
def __repr__(self):
return u'<Set %s>'.encode('utf8') % self.setName
def __iter__(self):
return iter(self._set_dict.items()) if PY3 else \
self._set_dict.iteritems()
class MetadataFormat(OAIItem):
"""Represents an OAI MetadataFormat.
:param mdf_element: The XML element 'metadataFormat'.
:type mdf_element: :class:`lxml.etree._Element`
"""
def __init__(self, mdf_element):
super(MetadataFormat, self).__init__(mdf_element, strip_ns=True)
#: The prefix of this format.
self._mdf_dict = xml_to_dict(self.xml, strip_ns=True)
for k, v in self._mdf_dict.items():
setattr(self, k.replace('-', '_'), v[0])
def __repr__(self):
return u'<MetadataFormat %s>'.encode('utf8') % self.metadataPrefix
def __iter__(self):
return iter(self._mdf_dict.items()) if PY3 else \
self._mdf_dict.iteritems()