Skip to content

Commit

Permalink
fixed spaces in filenames issue
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Hanke committed Aug 28, 2024
1 parent e16aedf commit 529d3cf
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
14 changes: 12 additions & 2 deletions annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re, os
import ast
import json
from urllib.parse import urlparse, unquote
from urllib.parse import urlparse, unquote, quote
from dateutil.parser import parse as date_parse
from csv import Sniffer
import requests
Expand Down Expand Up @@ -337,6 +337,9 @@ def __init__(
self.url, self.encoding, self.authorization
)
self.file_domain = self.url.rsplit(self.file_name, 1)[0]
# use escaped name, no spaces allowed
self.file_name = quote(self.file_name)

self.meta_file_name = self.file_name.rsplit(".", 1)[0] + "-metadata.json"
self.csv_namespace = self.file_domain + self.file_name + "/"
self.context = [
Expand Down Expand Up @@ -376,9 +379,16 @@ def annotate(self) -> dict:
self.result_dict = self.process_data()
return self.result_dict

def convert(self, format: str) -> str:
def graph(self) -> Graph:
g = Graph()
g.parse(data=json.dumps(self.result_dict), format="json-ld")
# with open("test.json", "w") as f:
# json.dump(self.result_dict, f, indent=4)
return g

def convert(self, format: str) -> str:
g = self.graph()
# g.serialize("test.ttl", format="json-ld")
self.meta_file_name = self.meta_file_name.rsplit(".", 1)[0]
if format in ["turtle", "longturtle"]:
self.meta_file_name += ".ttl"
Expand Down
9 changes: 5 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# app.py
import os
import base64
from urllib.parse import urlparse
from urllib.parse import urlparse, quote

import uvicorn
from starlette_wtf import StarletteForm, CSRFProtectMiddleware, csrf_protect
Expand Down Expand Up @@ -355,12 +355,13 @@ async def annotate_upload(
# add prov o documentation
result = {**result, **annotate_prov(request.url._url)}
data = annotator.convert(format=return_type.value)

data_bytes = BytesIO(data.encode())
filename = annotator.meta_file_name
# delete the temp csv file
if os.path.isfile(file.filename):
os.remove(file.filename)
return RDFStreamingResponse(content=data_bytes, filename=filename)
# if os.path.isfile(file.filename):
# os.remove(file.filename)
return RDFStreamingResponse(content=data_bytes, filename=quote(filename))


@app.post("/api/rdf", response_class=RDFStreamingResponse)
Expand Down
4 changes: 2 additions & 2 deletions csvw_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rdflib.namespace import CSVW, RDF, XSD, PROV, RDFS, DC
from datetime import datetime
from urllib.request import urlopen
from urllib.parse import urlparse, unquote
from urllib.parse import urlparse, unquote, quote
import io, os
import logging
import requests
Expand Down Expand Up @@ -186,7 +186,7 @@ def __init__(
# self.metagraph.serialize('metagraph.ttl')
print("meta_root: " + self.meta_root)
# print('csv_url: '+url)
self.base_url = "{}/".format(str(self.meta_root).rsplit("/", 1)[0])
self.base_url = "{}/".format(quote(str(self.meta_root).rsplit("/", 1)[0]))
parsed_url = urlparse(url)
if parsed_url.scheme in ["https", "http", "file"]:
self.csv_url = url
Expand Down

0 comments on commit 529d3cf

Please sign in to comment.