Skip to content

Commit 4e50236

Browse files
committed
Initial commit of version 1.1
0 parents  commit 4e50236

File tree

10 files changed

+472
-0
lines changed

10 files changed

+472
-0
lines changed

README.md

Whitespace-only changes.

xmlutils/bin/xmlkvrecursive.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
2+
import sys,splunk.Intersplunk
3+
import re
4+
import urllib
5+
import xml.sax
6+
import xml.sax.saxutils as saxutils
7+
from xml.sax.handler import ContentHandler
8+
from xml.sax.handler import EntityResolver
9+
from xml.sax.xmlreader import InputSource
10+
import StringIO
11+
import types
12+
13+
class NullInputSource(InputSource):
14+
def getByteStream(self):
15+
return StringIO.StringIO("entity files not supported.")
16+
17+
class NullEntityResolver(EntityResolver):
18+
def resolveEntity(self,publicId,systemId):
19+
return NullInputSource()
20+
21+
class XmlHandler(ContentHandler):
22+
def __init__(self, flatten):
23+
self.flatten = flatten
24+
25+
def reset(self):
26+
self.key_prefix = []
27+
self.keys_seen = []
28+
self.new_fields = {}
29+
30+
def getNewFields(self):
31+
return self.new_fields
32+
33+
def setValue( self, value, suffix='' ):
34+
dest_key = '_'.join(self.key_prefix) + suffix
35+
36+
if( len( str(value).strip() ) > 0 ):
37+
#handle multiple values
38+
if dest_key in self.new_fields:
39+
self.new_fields['multi values'] = 'yep'
40+
#this is only the second value, so convert value to a list
41+
if type(self.new_fields[dest_key]) is not types.ListType:
42+
self.new_fields[dest_key] = [self.new_fields[dest_key]]
43+
#append the value to the list
44+
self.new_fields[dest_key].append(str(value))
45+
else:
46+
#insert the simple value
47+
self.new_fields[dest_key] = str(value)
48+
49+
def startElement(self, name, attrs):
50+
self.key_prefix.append(name)
51+
52+
#if flatten is set, then create a new prefix if this prefix has already been used
53+
if flatten and '_'.join(self.key_prefix) in self.keys_seen:
54+
self.key_prefix.pop()
55+
count = 2
56+
newName = name + '[' + str(count) + ']'
57+
while '_'.join(self.key_prefix) + '_' + newName in self.keys_seen:
58+
count += 1
59+
newName = name + '[' + str(count) + ']'
60+
self.key_prefix.append(newName)
61+
62+
self.keys_seen.append( '_'.join(self.key_prefix) )
63+
64+
if attrs.getLength() > 0:
65+
for k in attrs.getNames():
66+
self.setValue( attrs.getValue(k), "-" + k )
67+
68+
def characters(self, content):
69+
if content is not None and content.strip() is not '':
70+
self.setValue( content.strip() )
71+
72+
def endElement(self, name):
73+
self.key_prefix.pop()
74+
75+
76+
try:
77+
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
78+
79+
keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions()
80+
81+
flatten = argvals.get("flatten", "False")
82+
if flatten.strip().lower() in ['true','1','yes']:
83+
flatten = True
84+
else:
85+
flatten = False
86+
87+
handler = XmlHandler(flatten)
88+
89+
for r in results:
90+
try:
91+
if 'xml' in r:
92+
xml_text = r['xml']
93+
else:
94+
raw = r["_raw"]
95+
96+
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]
97+
98+
handler.reset()
99+
100+
parser = xml.sax.make_parser()
101+
parser.setContentHandler(handler)
102+
parser.setEntityResolver(NullEntityResolver())
103+
parser.parse(StringIO.StringIO(xml_text))
104+
105+
for k,v in handler.getNewFields().iteritems():
106+
r[k] = v
107+
except:
108+
import traceback
109+
stack = traceback.format_exc()
110+
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw']
111+
112+
except:
113+
import traceback
114+
stack = traceback.format_exc()
115+
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
116+
117+
splunk.Intersplunk.outputResults( results )

xmlutils/bin/xmlprettyprint.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
2+
import sys,splunk.Intersplunk
3+
import xml.sax
4+
import xml.sax.saxutils as saxutils
5+
from xml.sax.handler import ContentHandler
6+
from xml.sax.handler import EntityResolver
7+
from xml.sax.xmlreader import InputSource
8+
import StringIO
9+
10+
class NullInputSource(InputSource):
11+
def getByteStream(self):
12+
return StringIO.StringIO("entity files not supported.")
13+
14+
class NullEntityResolver(EntityResolver):
15+
def resolveEntity(self,publicId,systemId):
16+
return NullInputSource()
17+
18+
class XmlHandler(ContentHandler):
19+
def __init__(self):
20+
self.indent = 0
21+
22+
def reset(self , r):
23+
self.current_output = ''
24+
self.indent = 0
25+
self.open_tag = ''
26+
27+
def getOutput(self):
28+
return self.current_output
29+
30+
def startElement(self, name, attrs):
31+
self.open_tag = name
32+
self.current_output += '\n' + ' ' * self.indent
33+
self.indent += 1
34+
self.current_output += '<' + name
35+
36+
if attrs.getLength() > 0:
37+
for k in attrs.getNames():
38+
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k))
39+
self.current_output += '>'
40+
41+
def characters(self, content):
42+
if len(content.strip()) > 0:
43+
# self.current_output += ' ' * self.indent
44+
self.current_output += saxutils.escape( content ) #+ '\n'
45+
46+
def endElement(self, name):
47+
self.indent -= 1
48+
if self.open_tag != name:
49+
self.current_output += '\n' + ' ' * self.indent
50+
self.current_output += '</' + name + '>'
51+
52+
53+
try:
54+
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
55+
56+
handler = XmlHandler()
57+
58+
for r in results:
59+
try:
60+
if 'xml' in r:
61+
xml_text = r['xml']
62+
dest_field = 'xml'
63+
else:
64+
raw = r["_raw"]
65+
dest_field = '_raw'
66+
67+
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]
68+
69+
handler.reset(xml_text)
70+
parser = xml.sax.make_parser()
71+
parser.setContentHandler(handler)
72+
parser.setEntityResolver(NullEntityResolver())
73+
parser.parse(StringIO.StringIO(xml_text))
74+
75+
r[dest_field] = handler.getOutput()
76+
77+
if 'xml' in r:
78+
xml_text = r['xml']
79+
else:
80+
raw = r["_raw"]
81+
82+
except:
83+
import traceback
84+
stack = traceback.format_exc()
85+
r['_raw'] = "Failed to parse: " + str(stack) + "\n" + r['_raw']
86+
87+
except:
88+
import traceback
89+
stack = traceback.format_exc()
90+
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
91+
92+
splunk.Intersplunk.outputResults( results )

xmlutils/bin/xmlsplit.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
2+
import sys,splunk.Intersplunk
3+
import re
4+
import xml.sax
5+
import xml.sax.saxutils as saxutils
6+
from xml.sax.handler import ContentHandler
7+
from xml.sax.handler import EntityResolver
8+
from xml.sax.xmlreader import InputSource
9+
import copy
10+
import StringIO
11+
12+
13+
class NullInputSource(InputSource):
14+
def getByteStream(self):
15+
return StringIO.StringIO("entity files not supported.")
16+
17+
class NullEntityResolver(EntityResolver):
18+
def resolveEntity(self,publicId,systemId):
19+
return NullInputSource()
20+
21+
class XmlHandler(ContentHandler):
22+
def __init__(self, field):
23+
self.field = field
24+
25+
def reset(self , newResults):
26+
self.current_output = ''
27+
self.newResults = newResults
28+
29+
def startElement(self, name, attrs):
30+
if name == field:
31+
self.current_output = ''
32+
self.current_output += '<' + name
33+
34+
if attrs.getLength() > 0:
35+
for k in attrs.getNames():
36+
self.current_output += ' ' + k + '=' + saxutils.quoteattr(attrs.getValue(k))
37+
self.current_output += '>'
38+
39+
def characters(self, content):
40+
self.current_output += saxutils.escape( content )
41+
42+
def endElement(self, name):
43+
self.current_output += '</' + name + '>'
44+
if name == field:
45+
if re.match('^<' + field + '[ >]', self.current_output):
46+
newRow = copy.deepcopy(r)
47+
newRow['_raw'] = self.current_output
48+
self.newResults.append(newRow)
49+
self.current_output = ''
50+
51+
try:
52+
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
53+
54+
keywords, argvals = splunk.Intersplunk.getKeywordsAndOptions()
55+
56+
field = argvals.get("field", None)
57+
if field is None:
58+
raise Exception("Must supply name of field in field=fieldName")
59+
60+
newResults = []
61+
62+
handler = XmlHandler(field)
63+
64+
for r in results:
65+
try:
66+
if 'xml' in r:
67+
xml_text = r['xml']
68+
else:
69+
raw = r["_raw"]
70+
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]
71+
72+
handler.reset(newResults)
73+
parser = xml.sax.make_parser()
74+
parser.setContentHandler(handler)
75+
parser.setEntityResolver(NullEntityResolver())
76+
parser.parse(StringIO.StringIO(xml_text))
77+
except:
78+
import traceback
79+
stack = traceback.format_exc()
80+
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw']
81+
newResults = [r]
82+
83+
except:
84+
import traceback
85+
stack = traceback.format_exc()
86+
newResults = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
87+
88+
splunk.Intersplunk.outputResults( newResults )
89+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright (C) 2010 Splunk Inc. All Rights Reserved. Version 4.0
2+
import splunk.Intersplunk
3+
4+
5+
try:
6+
results,dummyresults,settings = splunk.Intersplunk.getOrganizedResults()
7+
8+
for r in results:
9+
try:
10+
if 'xml' in r:
11+
xml_text = r['xml']
12+
dest_field = 'xml'
13+
else:
14+
raw = r["_raw"]
15+
dest_field = '_raw'
16+
17+
xml_text = raw[ raw.index( '<' ) : raw.rindex( '>' )+1 ]
18+
if xml_text.startswith('<?'):
19+
#remove the xml declaration. I know, I know, but I ran into a case where charset was wrong, and the parser explodes.
20+
xml_text = xml_text[ raw.index( '<' , 5 ) : raw.rindex( '>' )+1 ]
21+
22+
r[dest_field] = xml_text
23+
24+
except:
25+
import traceback
26+
stack = traceback.format_exc()
27+
r['_raw'] = "Failed to parse: " + str(stack) + r['_raw']
28+
29+
except:
30+
import traceback
31+
stack = traceback.format_exc()
32+
results = splunk.Intersplunk.generateErrorResults("Error : Traceback: " + str(stack))
33+
34+
splunk.Intersplunk.outputResults( results )

xmlutils/default/app.conf

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[launcher]
2+
version = 1.1
3+
author = vbumgarner
4+
description = XML utilities
5+
6+
[package]
7+
id = xmlutils
8+
9+
[install]
10+
state = enabled
11+
build = 2
12+
13+
[ui]
14+
is_visible = false
15+
is_manageable = false
16+
label = xmlutils
17+
18+

xmlutils/default/commands.conf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[xmlkvrecursive]
2+
filename = xmlkvrecursive.py
3+
retainsevents = true
4+
overrides_timeorder = false
5+
streaming = true
6+
7+
[xmlsplit]
8+
filename = xmlsplit.py
9+
retainsevents = true
10+
overrides_timeorder = false
11+
run_in_preview = false
12+
streaming = true
13+
14+
[xmlprettyprint]
15+
filename = xmlprettyprint.py
16+
retainsevents = true
17+
overrides_timeorder = false
18+
run_in_preview = false
19+
streaming = true
20+
21+
[xmlstripdeclaration]
22+
filename = xmlstripdeclaration.py
23+
retainsevents = true
24+
overrides_timeorder = false
25+
run_in_preview = false
26+
streaming = true
27+

0 commit comments

Comments
 (0)