-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_data.py
32 lines (25 loc) · 878 Bytes
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
""" Retrieve all data from DynamoDB and save to a local CSV file. """
import json
import boto3
import pandas as pd
client = boto3.client('dynamodb')
LIMIT = 1000000000
last_key = json.load(open('last_key.txt', 'r'))
df = pd.read_csv('tweets.csv')
tweets = df.to_dict('records')
res = client.scan(TableName='iot-tweets', Limit=LIMIT, ExclusiveStartKey=last_key)
tweets.extend(res['Items'])
i = 0
while 'LastEvaluatedKey' in res:
i += 1
res = client.scan(TableName='iot-tweets', ExclusiveStartKey=res['LastEvaluatedKey'],
Limit=LIMIT)
tweets.extend(res['Items'])
print(len(tweets))
if i % 10 == 0:
df = pd.DataFrame(tweets)
df.to_csv('tweets.csv', index=None)
json.dump(res['LastEvaluatedKey'], open('last_key.txt', 'w'))
print('saved')
df = pd.DataFrame(tweets)
df.to_csv('tweets.csv', index=None)