-
Notifications
You must be signed in to change notification settings - Fork 0
/
uwoa.py
116 lines (106 loc) · 3.6 KB
/
uwoa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#! /usr/bin/env python3
import csv
import os
import json
import urllib.request
from collections import Counter
from urllib.parse import quote
from pprint import pprint as ppr
apiurl = 'https://api.unpaywall.org/v2/{0}[email protected]'
wosdata = []
with open('data/wos1.csv', 'rt') as wos1:
rdr = csv.DictReader(wos1)
for row in rdr:
wosdata.append(row)
with open('data/wos2.csv', 'rt') as wos2:
rdr = csv.DictReader(wos2)
fieldnames = rdr.fieldnames
for row in rdr:
wosdata.append(row)
#for row in wosdata:
# print(row['Source'], row['DOI'])
pricelist = []
for f in os.listdir('apc'):
with open('apc/'+f, 'rt', encoding='Latin-1') as fdata:
rdr = csv.DictReader(fdata)
#print(f, rdr.fieldnames)
if 'Title' in rdr.fieldnames and 'USD' in rdr.fieldnames:
for row in rdr:
if row['Title'] != '':
pricelist.append({'File': f, 'Title': row['Title'],
'USD': row['USD']})
elif 'Title' in rdr.fieldnames and 'APC USD ' in rdr.fieldnames:
for row in rdr:
pricelist.append({'File': f, 'Title': row['Title'],
'USD': row['APC USD ']})
elif 'Title' in rdr.fieldnames and 'CHF' in rdr.fieldnames:
for row in rdr:
pricelist.append({'File': f, 'Title': row['Title'],
'USD': row['CHF']})
else:
#print(f)
for row in rdr:
if row['Currency'] == 'EUR':
USD = str(int(round(float(row['Price'])*1.12)))
elif row['Currency'] == 'USD':
USD = row['Price']
else:
print('????', row['Currency'], '????')
pricelist.append({'File': f, 'Title': row['Journal title'],
'USD': USD})
output = []
for r in wosdata:
#match = False
for price in pricelist:
if r['Publication Date'] != '2017':
continue
p = price['Title'].lower()
s = r['Source'].lower()
if p == s:
doi = quote(r['DOI'])
try:
resp = urllib.request.urlopen(apiurl.format(doi))
except:
continue
content = resp.read().decode('utf-8')
data = json.loads(content)
#match = True
if data['oa_status'] in ['closed', 'green']:
continue
article = {'doi': r['DOI'], 'apc':price['USD']}
article['type'] = data['oa_status']
article['title'] = price['Title']
output.append(article)
#ppr(article)
continue
fieldnames = ['doi', 'apc', 'type', 'title']
with open('costs.csv', 'wt') as cost:
wtr = csv.DictWriter(cost, fieldnames=fieldnames)
wtr.writeheader()
for i in output:
wtr.writerow(i)
#fieldnames = ['File', 'Title', 'USD']
#with open('pricelist.csv', 'wt') as pl:
# wtr = csv.DictWriter(pl, fieldnames=fieldnames)
# wtr.writeheader()
# for r in pricelist:
# wtr.writerow(r)
#matched = []
#unmatched = []
#for row in wosdata:
# match = False
# for price in pricelist:
# p = price['Title'].lower()
# s = row['Source'].lower()
# if p == s:
# matched.append(row['Source'])
# match = True
# else:
# if not match:
# unmatched.append(row['Source'])
#print(unmatched)
#print(len(unmatched))
#c = Counter(matched)
#for k, v in c.most_common():
# print('{}\t{}'.format(v, k))
#print(sum(c.values()))