Skip to content

Commit

Permalink
Merge pull request #159 from seasidesparrow/master
Browse files Browse the repository at this point in the history
bump feedparser to 6.0.8
  • Loading branch information
seasidesparrow committed Nov 5, 2021
2 parents 6c82045 + d6b2a8e commit eee8cf3
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 10 deletions.
17 changes: 17 additions & 0 deletions examples/ex_arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python
import os
import sys
from pyingest.parsers.atel import ATelParser
from pyingest.serializers.classic import Tagged

rss_url = 'http://www.astronomerstelegram.org/?adsbiblio'

parser = ATelParser()
documents = parser.parse(rss_url, data_tag='item')

outputfp = open('atel.tag', 'a')
for d in documents:
serializer = Tagged()
serializer.write(d, outputfp)

outputfp.close()
40 changes: 32 additions & 8 deletions pyingest/parsers/proquest.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,13 @@ def parse(self):
record = next(reader)

# ProQuest ID (001)
proqid = record['001'].value()
pubnr = proqid.replace('AAI', '')
try:
proqid = record['001'].value()
except Exception as err:
print('unable to get proquest id! %s ' % err)
else:
print('I am processing ProQuest ID# %s' % proqid)
pubnr = proqid.replace('AAI', '')

# MARC 2.1 fixed length data elements (005)
flde = record['005'].value()
Expand Down Expand Up @@ -209,12 +214,30 @@ def parse(self):
url = url_base % pubnr
properties['ELECTR'] = url

output_metadata['source'] = datasource
output_metadata['authors'] = author
output_metadata['affiliations'] = [affil]
output_metadata['title'] = title
output_metadata['abstract'] = abstract
output_metadata['publication'] = '; '.join(jfield)
try:
output_metadata['source'] = datasource
except:
print('datasource missing')
try:
output_metadata['authors'] = author
except:
print('author missing')
try:
output_metadata['affiliations'] = [affil]
except:
print('affil missing')
try:
output_metadata['title'] = title
except:
print('title missing')
try:
output_metadata['abstract'] = abstract
except:
print('abstract missing')
try:
output_metadata['publication'] = '; '.join(jfield)
except:
print('jfield missing')
if pubdate:
output_metadata['pubdate'] = "%s" % pubdate
if databases:
Expand All @@ -233,6 +256,7 @@ def parse(self):
except Exception as err:
print("Record skipped, MARC parsing failed: %s" % err)
else:
print("Record processed successfully.")
self.results.append(output_metadata)

return
2 changes: 1 addition & 1 deletion pyingest/serializers/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class Tagged(object):
def write(cls, record, fp=sys.stdout):
for field in cls.fieldDict:
content = record.get(field)
if field is 'bibcode' and content is None:
if field == 'bibcode' and content == None:
continue
elif not content:
continue
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
bs4==0.0.1
feedparser==5.2.1
feedparser==6.0.8
future==0.18.2
habanero==0.7.4
jsonschema==3.1.1
Expand Down

0 comments on commit eee8cf3

Please sign in to comment.