Skip to content

Commit c39e9c8

Browse files
authored
Merge pull request #173 from ruby-docx/add-header-extraction
Add ability to read document headers
2 parents 6c2aa2f + efcaf8c commit c39e9c8

3 files changed

Lines changed: 23 additions & 1 deletion

File tree

lib/docx/document.rb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ module Docx
2222
class Document
2323
include Docx::SimpleInspect
2424

25-
attr_reader :xml, :doc, :zip, :styles
25+
attr_reader :xml, :doc, :zip, :styles, :headers
2626

2727
def initialize(path_or_io, options = {})
2828
@replace = {}
@@ -40,6 +40,7 @@ def initialize(path_or_io, options = {})
4040
@document_xml = document.get_input_stream.read
4141
@doc = Nokogiri::XML(@document_xml)
4242
load_styles
43+
load_headers
4344
yield(self) if block_given?
4445
ensure
4546
@zip.close unless @zip.nil?
@@ -200,6 +201,15 @@ def with_zip64_disabled
200201
Zip.write_zip64_support = previous
201202
end
202203

204+
def load_headers
205+
header_files = @zip.glob("word/header*.xml").map{|h| h.name}
206+
filename_and_contents_pairs = header_files.map do |file|
207+
simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "")
208+
[simple_file_name, Nokogiri::XML(@zip.read(file))]
209+
end
210+
@headers = Hash[filename_and_contents_pairs]
211+
end
212+
203213
def load_styles
204214
@styles_xml = @zip.read('word/styles.xml')
205215
@styles = Nokogiri::XML(@styles_xml)

spec/docx/document_spec.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,18 @@
6060
end
6161
end
6262

63+
describe 'read headers' do
64+
before do
65+
@doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx')
66+
end
67+
68+
it 'can extract headers' do
69+
expect(@doc.headers).to_not be_nil
70+
expect(@doc.headers.keys).to eq ["header1"]
71+
expect(@doc.headers["header1"].text).to eq "Hello from the header."
72+
end
73+
end
74+
6375
describe 'read tables' do
6476
before do
6577
@doc = Docx::Document.open(@fixtures_path + '/tables.docx')

spec/fixtures/multi_doc.docx

6.13 KB
Binary file not shown.

0 commit comments

Comments
 (0)