-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLambda.py
More file actions
93 lines (89 loc) · 3.87 KB
/
Lambda.py
File metadata and controls
93 lines (89 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import json
import boto3
import os
import datetime
import re
import hashlib
def lambda_handler(event, context):
head = list()
tail = list()
resultlist = list()
pattern=re.compile(os.environ['pattern'])
s3 = boto3.client('s3')
#Checking if the query has input as query string parameters or json
if event['queryStringParameters']:
t = event['queryStringParameters']['T']
dt = event['queryStringParameters']['dT']
else:
t = json.loads(event['body'])['T']
dt = json.loads(event['body'])['dT']
#function for binary search
def binarysearch(low,mid,high,timetocheck,logfile):
print('In binary search')
while low<=high:
mid = (high+low)//2
timenow = int(logfile[mid][0:2])*60*60 + int(logfile[mid][3:5])*60 + int(logfile[mid][6:8])
if (timenow>timetocheck):
high=mid-1
elif (timenow<timetocheck):
low=mid+1
else:
print('Breaking out of while')
break
return mid
#function to get the MD5 hash for log strings
def md5resp():
print('in md5resp')
logfile = list()
logfile=s3.get_object(Bucket=os.environ['s3bucket'],Key=os.environ['filepath']).get('Body').read().decode('utf-8')
logfile = "".join(logfile).replace("\r", "").split("\n")
logfile.pop()
#converting time to seconds to perform a binary search
tsecs = int(t[0:2])*60*60 + int(t[3:5])*60 + int(t[6:8])
dtsecs = int(dt[0:2])*60*60 + int(dt[3:5])*60 + int(dt[6:8])
lowsecs = tsecs - dtsecs
highsecs = tsecs + dtsecs
logindext = binarysearch(0,0,len(logfile)-1,tsecs,logfile)
logindexlow = binarysearch(0,0,logindext,lowsecs,logfile)
logindexhigh = binarysearch(logindexlow,0,len(logfile)-1,highsecs,logfile)
print('getting our messages')
for item in range(logindexlow,logindexhigh+1):
if pattern.search(logfile[item]):
print('Found a log message')
resultlist.append(hashlib.md5(logfile[item].encode('utf-8')).hexdigest())
print(resultlist)
return resultlist
#get the first few bytes of the file
head = s3.get_object(Bucket=os.environ['s3bucket'],Key=os.environ['filepath'],Range=os.environ['headrange']).get('Body').read().decode('utf-8')
head = "".join(head).split("\r")[0].replace("\n", "")
#getting the last few bytes of the file
tail = s3.get_object(Bucket=os.environ['s3bucket'],Key=os.environ['filepath'],Range=os.environ['tailrange']).get('Body').read().decode('utf-8')
tail = "".join(tail).split("\r")[-2].replace("\n", "")
#get the first and last timestamp of the log file
firstlog = datetime.time(int(head[0:2]),int(head[3:5]))
lastlog = datetime.time(int(tail[0:2]),int(tail[3:5]))
#convert the input to time
inputtime=datetime.time(int(t[0:2]), int(t[3:5]))
timechange = datetime.timedelta(hours=int(dt[0:2]),minutes=int(dt[3:5]))
#using datetime to allow us to add and subtract time, and then compare it
low = (datetime.datetime.combine(datetime.date(1,1,1),inputtime) - timechange).time()
high = (datetime.datetime.combine(datetime.date(1,1,1),inputtime) + timechange).time()
print('firstlog=',firstlog)
print('lastlog=',lastlog)
print('low=',low)
print('high=',high)
#if the time window in input lies in the start and end of the log file
if(firstlog <= low and lastlog >= high):
print('trying to get md5 hash')
result=md5resp()
print('result=', result)
return{
'statusCode': 200,
'body': ",".join(result)
}
else:
print('The time window does not exist in this log file')
return {
'statusCode': 404,
'body': json.dumps('The time window does not exist in this log file')
}