-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_wa_log.py
More file actions
203 lines (164 loc) · 8.26 KB
/
parse_wa_log.py
File metadata and controls
203 lines (164 loc) · 8.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# Parses a Web Adaptor 11.x log file into a csv
# "C:\Program Files\ArcGIS\Server\framework\runtime\ArcGIS\bin\Python\Scripts\propy.bat" "parse_wa_log.py"
# Copyright 2025 Esri
#
# Licensed under the Apache License Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import datetime
import csv
from urllib.parse import urlparse
import re
def main(argv=None):
logFileName = r'C:\temp\scdnr\22July\webadaptor20240722.log'
# create output csv with same name
csvFileName = createCsvFileName(logFileName)
# do the work
parseWebAdaptorLogFile(logFileName, csvFileName)
print('File output: ' + csvFileName)
# create a matching csv name for the logfile
def createCsvFileName(logFileName):
shortLogFileName = os.path.basename(logFileName)
csvFileBaseName = shortLogFileName.replace('.log','.csv')
thePath = os.path.dirname(logFileName)
csvFileName = os.path.join(thePath,csvFileBaseName)
return csvFileName
# this is the status code that Web Adaptor returns to IIS
def extractFrontEndStatusCodeFromFinishedRecord(message):
statusCode = ''
try:
matchList = re.findall('- \d{3}', message)
if len(matchList) == 1:
matchList = re.findall('\d{3}',matchList[0])
if len(matchList) == 1:
statusCode = matchList[0]
except:
statusCode = ''
return statusCode
# this is the status code that Web Adaptor receives from the backend
def extractBackEndStatusCodeFromFinishedRecord(message):
statusCode = ''
try:
if 'End processing HTTP request after' in message:
endOfTiming = message.find('ms -')
statusCode = message[(endOfTiming + len('ms –')):len(message)]
except:
statusCode = ''
return statusCode
# pulls out any url (received from the front or sent to the back)
def extractUrlFromFinishedRecord(message):
parseResult = None
try:
if 'https://' in message:
startOfUrl = message.find('https://')
endOfUrl = message.find(' ',startOfUrl)
url = message[startOfUrl:endOfUrl]
parseResult = urlparse(url)
except:
parseResult = None
return parseResult
# parses information from the "message" portion so it can be normalized for querying purposes
def finalProcessingOfRecord(lastProcessedRecord):
message = lastProcessedRecord['message']
# find the frontendstatuscode (if present) and add it to the dictionary
statusCode = extractFrontEndStatusCodeFromFinishedRecord(message)
lastProcessedRecord['frontstatuscode'] = statusCode
# find the backendstatuscode (if present) and add it to the dictionary
backStatusCode = extractBackEndStatusCodeFromFinishedRecord(message)
lastProcessedRecord['backstatuscode'] = backStatusCode
# url components
parseResult = extractUrlFromFinishedRecord(message)
if (parseResult != None):
lastProcessedRecord['targethost'] = parseResult.netloc
lastProcessedRecord['targetpath'] = parseResult.path
lastProcessedRecord['targetquery'] = parseResult.query
else:
lastProcessedRecord['targethost'] = ''
lastProcessedRecord['targetpath'] = ''
lastProcessedRecord['targetquery'] = ''
return lastProcessedRecord
# do the log reading, parsing, and csv writing
def parseWebAdaptorLogFile(logFileName, csvFileName):
# open csv for output
with open (csvFileName, 'w', newline="") as csvFile:
bKeysWritten = False
# open log file for reading
with open (logFileName,'r', encoding="UTF-8") as logFile:
# read all lines
lines = logFile.readlines()
count = 0
lastProcessedRecord = {} # a dictionary for one line
#iterate through lines
for line in lines:
count += 1
endOfDate = line.find('T',0)
theDate = line[0:endOfDate]
bIsDate = False
try:
d = datetime.datetime.strptime(theDate,'%Y-%m-%d')
bIsDate = True
except:
bIsDate = False
# if the line begins with a date, we have a new record to parse
if bIsDate:
# if we have an accumulated lastProcessedRecord, write it and re-initialize (because the start of this line indicates there is a new record to process.
if bool(lastProcessedRecord):
# extract any goodies
lastProcessedRecord = finalProcessingOfRecord(lastProcessedRecord)
# record the record
if bKeysWritten == False:
# if we've not yet written the header, do that and the first record
writer = csv.DictWriter(csvFile, lastProcessedRecord.keys())
writer.writeheader()
writer.writerow(lastProcessedRecord)
bKeysWritten = True
else:
# if the header exists, write the record
writer.writerow(lastProcessedRecord)
# note progress to stdout
if (count % 1000 == 0):
print('Record: ' + str(count) + ' written')
# re-initialize it
lastProcessedRecord = {}
# parse the new log line ...
#endOfTime = line.find('-',endOfDate) # probably only works west of greenwich (otherwise it might be '+')
endOfTime = (endOfDate + 17) # getting time based on observed length of time information.
theTime = line[(endOfDate + 1):(endOfTime)]
startOfType = line.find(' [',endOfTime)
theZone = line[endOfTime:startOfType]
endOfType = line.find('] ',startOfType)
theType = line[(startOfType + 2):endOfType]
startOfModule = line.find(' (',endOfType)
endOfModule = line.find(') ', startOfModule)
theModule = line[(startOfModule + 2):endOfModule]
theMessage = line[(endOfModule + 1):len(line)]
# start the dictionary for this line, it will either be appended to or written on the next iteration of the loop ...
lastProcessedRecord['lineNumber'] = str(count)
lastProcessedRecord['date'] = theDate
lastProcessedRecord['time'] = theTime
lastProcessedRecord['datetime'] = theDate + ' ' + theTime
lastProcessedRecord['zone'] = theZone
lastProcessedRecord['type'] = theType
lastProcessedRecord['module'] = theModule
lastProcessedRecord['message'] = theMessage.replace('\n',' ').replace('\r',' ')
# if the line doesn't begin with a date, we want to append this line's content to the lastProcessedRecord.message entity
else:
lastProcessedRecord['message'] = lastProcessedRecord['message'] + ' ' + line.replace('\n',' ').replace('\r',' ')
#if count > 300:
# break
# write the last dictionary, if it has not already been written
if bool(lastProcessedRecord):
writer.writerow(lastProcessedRecord)
print('Final record: ' + str(count) + ' written')
if __name__ == "__main__":
sys.exit(main(sys.argv))