-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse.py
38 lines (38 loc) · 930 Bytes
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from __future__ import print_function
import codecs
import csv,sys
import heapq
import time
import os
import re
import math
import operator
import threading
from collections import *
cnt =0
wrds =defaultdict(int)
fname = ''
prevq= ''
prevtime=''
# with open("head.txt")
for i in range(1,11):
if i!=10:
fname = "./AOL-user-ct-collection/user-ct-test-collection-0" + str(i) + ".txt"
else:
fname = "./AOL-user-ct-collection/user-ct-test-collection-" + str(i) + ".txt"
with open(fname) as FF:
for line in FF:
lp=line.split('\t')
if prevq!=lp[1] or prevtime != lp[2]:
wrds[lp[1]]+=1
prevq=lp[1]
prvtime=lp[2]
tail=open("tail.txt",'w')
head=open("head.txt",'w')
for i in wrds:
trm=""
trm += str(i) + " " + str(wrds[i])
if wrds[i]<50 :
print(trm,file=tail)
elif wrds[i]>=500:
print(trm,file=head)