diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 90e74bd..bf91650 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -9,7 +9,7 @@ jobs: timeout-minutes: 10 strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index f6820a7..14241d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project follows [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Added +- Export methods to library: `exports` to list all exports and `module_export`/`module_item_export` to export multiple or a single item +- New example with a flattend result dict ## [0.3.0] - 2021-10-27 ### Changed diff --git a/examples/export_from_museumplus.py b/examples/export_from_museumplus.py new file mode 100644 index 0000000..cd08c31 --- /dev/null +++ b/examples/export_from_museumplus.py @@ -0,0 +1,50 @@ +import museumpy +import requests +from dotenv import load_dotenv, find_dotenv +import os + +load_dotenv(find_dotenv()) +user = os.getenv('MP_USER') +pw = os.getenv('MP_PASS') +s = requests.Session() +s.auth = (user, pw) + +client = museumpy.MuseumPlusClient( + base_url='https://mpzurichrietberg.zetcom.com/MpWeb-mpZurichRietberg', + session=s +) + +exports = client.exports(module='ObjectGroup') + +# find a csv export +csv_export_id = None +for export in exports: + if export['extension'] == 'csv': + csv_export_id = export['id'] + break + +print(f"CSV-Export ID: {csv_export_id}") + +# export all +all_csv_path = client.module_export(csv_export_id, module='ObjectGroup') +print(f"Full CSV export: {all_csv_path}") + +# export with filter +filtered_module_export_path = client.module_export( + csv_export_id, + field='OgrNameTxt', + value='Himmelheber-Fotoarchiv (Furbo)', + module='ObjectGroup' +) +print(f"Filtered CSV: {filtered_module_export_path}") + +# export single item +result = client.search( + field='OgrNameTxt', + value='Himmelheber-Fotoarchiv (Furbo)', + module='ObjectGroup' +)[0] + +item_id = result['raw']['moduleItem']['id'] +single_item_path = client.module_item_export(item_id, csv_export_id, module='ObjectGroup') +print(f"Single item export: {single_item_path}") diff --git a/examples/flat_dict.py b/examples/flat_dict.py new file mode 100644 index 0000000..aba140a --- /dev/null +++ b/examples/flat_dict.py @@ -0,0 +1,37 @@ +# flake8: noqa +# pip install flatten-dict +import museumpy +import requests +from dotenv import load_dotenv, find_dotenv +from pprint import pprint +from flatten_dict import flatten +import os + + +def flat_dict(record, xml): + d = record['raw'] + def leaf_reducer(k1, k2): + if k1 is None or k2.lower() in k1.lower(): + return k2 + if k2 == "text": + return k1 + return f"{k1}_{k2}" + + flat_data = flatten(d, max_flatten_depth=2, reducer=leaf_reducer) + return flat_data + + +load_dotenv(find_dotenv()) +user = os.getenv('MP_USER') +pw = os.getenv('MP_PASS') +s = requests.Session() +s.auth = (user, pw) + +client = museumpy.MuseumPlusClient( + base_url='https://mpzurichrietberg.zetcom.com/MpWeb-mpZurichRietberg', + map_function=flat_dict, + session=s +) + +records = client.search(field='ObjObjectNumberTxt', value='2019.184') +pprint(records[0]) diff --git a/museumpy/__init__.py b/museumpy/__init__.py index ea125b5..b97677c 100644 --- a/museumpy/__init__.py +++ b/museumpy/__init__.py @@ -16,6 +16,7 @@ def fulltext_search(base_url, query, **kwargs): # noqa c = MuseumPlusClient(**client_kwargs) return c.search(**search_kwargs) + def search(base_url, field, value, **kwargs): # noqa search_params = ['field', 'value', 'module', 'limit', 'offset'] search_kwargs = {k: v for k, v in kwargs.items() if k in search_params} @@ -28,3 +29,15 @@ def search(base_url, field, value, **kwargs): # noqa c = MuseumPlusClient(**client_kwargs) return c.search(**search_kwargs) + + +def exports(base_url, **kwargs): + exports_params = ['module'] + export_kwargs = {k: v for k, v in kwargs.items() if k in exports_params} + + # assume all others kwargs are for the client + client_kwargs = {k: v for k, v in kwargs.items() if k not in exports_params} + client_kwargs['base_url'] = base_url + + c = MuseumPlusClient(**client_kwargs) + return c.exports(**export_kwargs) diff --git a/museumpy/client.py b/museumpy/client.py index e9431af..c5b7d24 100644 --- a/museumpy/client.py +++ b/museumpy/client.py @@ -34,6 +34,16 @@ """ # noqa +SEARCH_TEMPLATE_NO_FIELD = """ + + + + + + + +""" # noqa + class MuseumPlusClient(object): def __init__(self, base_url=None, map_function=None, session=None): @@ -52,16 +62,25 @@ def fulltext_search(self, query, module='Object', limit=100, offset=0): data_loader = DataPoster(url, self.session, params, FULLTEXT_TEMPLATE) return response.SearchResponse(data_loader, limit, offset, self.map_function) - def search(self, field, value, module='Object', limit=100, offset=0): + def search(self, field=None, value=None, module='Object', limit=100, offset=0): url = f"{self.base_url}/ria-ws/application/module/{module}/search" params = { 'module_name': module, 'field': field, 'value': value, - } - data_loader = DataPoster(url, self.session, params, SEARCH_TEMPLATE) + } + if not field: + template = SEARCH_TEMPLATE_NO_FIELD + else: + template = SEARCH_TEMPLATE + data_loader = DataPoster(url, self.session, params, template) return response.SearchResponse(data_loader, limit, offset, self.map_function) + def exports(self, module='Object'): + url = f"{self.base_url}/ria-ws/application/module/{module}/export" + data_loader = DataLoader(url, self.session) + return response.ExportResponse(data_loader) + def module_item(self, id, module='Object'): url = f"{self.base_url}/ria-ws/application/module/{module}/{id}" data_loader = DataLoader(url, self.session) @@ -70,6 +89,28 @@ def module_item(self, id, module='Object'): return resp[0] return resp + def module_item_export(self, id, export_id, module='Object', dir='.'): + url = f"{self.base_url}/ria-ws/application/module/{module}/{id}/export/{export_id}" + data_loader = DataLoader(url, self.session) + return data_loader.download_file(url, dir) + + def module_export(self, export_id, field=None, value=None, module='Object', limit=100, offset=0, dir='.'): # noqa + url = f"{self.base_url}/ria-ws/application/module/{module}/export/{export_id}" + params = { + 'module_name': module, + 'field': field, + 'value': value, + 'limit': limit, + 'offset': offset, + } + if not field: + template = SEARCH_TEMPLATE_NO_FIELD + else: + template = SEARCH_TEMPLATE + + data_poster = DataPoster(url, self.session, params, template) + return data_poster.download_file(dir=dir) + def download_attachment(self, id, module='Object', dir='.'): url = f"{self.base_url}/ria-ws/application/module/{module}/{id}/attachment" data_loader = DataLoader(url, self.session) @@ -87,12 +128,25 @@ def __init__(self, url, session, params=None, template=None): def load(self, **kwargs): self.params.update(kwargs) xml = self.template.format(**self.params).encode('utf-8') - return self._post_xml(self.url, xml) + res = self._post_xml(self.url, xml) + return self.xmlparser.parse(res.content) + + def download_file(self, dir='.'): + xml = self.template.format(**self.params).encode('utf-8') + res = self._post_xml(self.url, xml) + d = res.headers.get('Content-Disposition') + fname = re.findall("filename=(.+)", d)[0] + assert fname, "Could not find filename in Content-Disposition header" + path = os.path.join(dir, fname) + with open(path, 'wb') as f: + for chunk in res.iter_content(1024): + f.write(chunk) + return path def _post_xml(self, url, xml): headers = {'Content-Type': 'application/xml'} res = self._post_content(url, xml, headers) - return self.xmlparser.parse(res.content) + return res def _post_content(self, url, data, headers={}): self.session.headers.update(headers) diff --git a/museumpy/response.py b/museumpy/response.py index af02800..79662e1 100644 --- a/museumpy/response.py +++ b/museumpy/response.py @@ -4,6 +4,7 @@ from . import errors ZETCOM_NS = "http://www.zetcom.com/ria/ws/module" +EXPORT_NS = "http://www.zetcom.com/ria/ws/module/export" class SearchResponse(object): @@ -151,3 +152,54 @@ def _load_new_data(self): raise errors.NoMoreRecordsError("There are no more records") xml = self.data_loader.load(limit=self.limit, offset=self.offset) self._parse_content(xml) + + +class ExportResponse(object): + def __init__(self, data_loader): + self.data_loader = data_loader + self.xmlparser = xmlparse.XMLParser() + self.exports = [] + xml = data_loader.load() + self._parse_content(xml) + + def _parse_content(self, xml): + self._extract_exports(xml) + + def _extract_exports(self, xml): + xml_recs = self.xmlparser.findall(xml, f'.//{{{EXPORT_NS}}}export') # noqa + for xml_rec in xml_recs: + export = self._map_xml(xml_rec) + export['raw'] = self.xmlparser.todict(xml_rec, xml_attribs=True) + self.exports.append(export) + + def _map_xml(self, xml_rec): + def xml_text(xpath): + return self.xmlparser.find(xml_rec, xpath).text + + export = { + 'id': xml_rec.attrib['id'], + 'extension': xml_text(f".//{{{EXPORT_NS}}}extension"), + 'label': xml_text(f".//{{{EXPORT_NS}}}label"), + } + return export + + def __repr__(self): + try: + return ( + 'ExportResponse(' + 'count=%r)' + ) % ( + len(self.exports) + ) + except AttributeError: + return 'ExportResponse(empty)' + + def __len__(self): + return len(self.exports) + + def __iter__(self): + for export in self.exports: + yield export + + def __getitem__(self, key): + return self.exports[key]