Skip to content

Commit

Permalink
Added generic avro file read method
Browse files Browse the repository at this point in the history
  • Loading branch information
miballe committed Oct 12, 2024
1 parent fc30b5b commit c465ad4
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions src/factiva/analytics/integration/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class SnapshotFiles(object):


def read_file(self, filepath, only_stats=False, merge_body=False) -> pd.DataFrame:
def read_avro_file(self, filepath, only_stats=False, merge_body=False) -> pd.DataFrame:
"""Reads a single Dow Jones snapshot datafile
Parameters
----------
Expand Down Expand Up @@ -49,7 +49,7 @@ def read_file(self, filepath, only_stats=False, merge_body=False) -> pd.DataFram
return r_df


def read_folder(self, folderpath, file_format='AVRO', only_stats=False, merge_body=False) -> pd.DataFrame:
def read_avro_folder(self, folderpath, file_format='AVRO', only_stats=False, merge_body=False) -> pd.DataFrame:
"""Scans a folder and reads the content of all files matching the format (file_format)
Parameters
----------
Expand All @@ -72,6 +72,24 @@ def read_folder(self, folderpath, file_format='AVRO', only_stats=False, merge_bo
r_df = pd.DataFrame()
for filename in os.listdir(folderpath):
if filename.lower().endswith("." + format_suffix):
t_df = self.read_file(folderpath + "/" + filename, only_stats, merge_body)
t_df = self.read_avro_file(folderpath + "/" + filename, only_stats, merge_body)
r_df = pd.concat([r_df, t_df])
return r_df

def read_raw_avro(self, filepath) -> pd.DataFrame:
"""Reads a generic AVRO file into a Pandas DataFrame
Parameters
----------
filepath : str
Relative or absolute file path
Returns
-------
pandas.DataFrame
A single Pandas Dataframe with the file content
"""
with open(filepath, "rb") as fp:
reader = fastavro.reader(fp)
records = [r for r in reader]
r_df = pd.DataFrame.from_records(records)

return r_df

0 comments on commit c465ad4

Please sign in to comment.