Skip to content
This repository was archived by the owner on Aug 20, 2020. It is now read-only.

Commit cb5d9e2

Browse files
committed
Initial Setup
1 parent ae2b3c2 commit cb5d9e2

2 files changed

Lines changed: 345 additions & 0 deletions

File tree

rss2.go

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
//
2+
// rss2 is a golang package for working with RSS 2 feeds and documents.
3+
//
4+
// @author R. S. Doiel, <rsdoiel@caltech.edu>
5+
//
6+
// Copyright (c) 2016, Caltech
7+
// All rights not granted herein are expressly reserved by Caltech.
8+
//
9+
// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10+
//
11+
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14+
//
15+
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
16+
//
17+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18+
//
19+
package rss2
20+
21+
import (
22+
"encoding/xml"
23+
"fmt"
24+
"html/template"
25+
"strconv"
26+
"strings"
27+
)
28+
29+
type RSS2 struct {
30+
XMLName xml.Name `xml:"rss" json:"-"`
31+
Version string `xml:"version,attr" json:"version"`
32+
// Required
33+
Title string `xml:"channel>title" json:"title"`
34+
Link string `xml:"channel>link" json:"link"`
35+
Description string `xml:"channel>description" json:"description"`
36+
// Optional
37+
PubDate string `xml:"channel>pubDate" json:"pubDate,omitempty"`
38+
ItemList []Item `xml:"channel>item" json:"item,omitempty"`
39+
}
40+
41+
type Item struct {
42+
// Optional according to Dave Winer
43+
Title string `xml:"title" json:"title,omitempty"`
44+
// Required
45+
Link string `xml:"link" json:"link"`
46+
// Optional
47+
Description template.HTML `xml:"description" json:"description,omitempty"`
48+
Content template.HTML `xml:"encoded" json:"encoded,omitempty"`
49+
PubDate string `xml:"pubDate" json:"pubDate,omitempty"`
50+
Comments string `xml:"comments" json:"comments,omitempty"`
51+
}
52+
53+
// Parse return an RSS2 document as a RSS2 structure.
54+
func Parse(buf []byte) (*RSS2, error) {
55+
data := new(RSS2)
56+
err := xml.Unmarshal(buf, &data)
57+
if err != nil {
58+
return nil, err
59+
}
60+
return data, nil
61+
}
62+
63+
func (r *RSS2) channel(dataPath string) (map[string]interface{}, error) {
64+
results := make(map[string]interface{})
65+
switch {
66+
case strings.Compare(dataPath, ".channel") == 0:
67+
// package and return all the channel fields
68+
results[".title"] = r.Title
69+
results[".link"] = r.Link
70+
results[".description"] = r.Description
71+
if r.PubDate != "" {
72+
results[".pubDate"] = r.PubDate
73+
}
74+
case strings.HasSuffix(dataPath, ".title"):
75+
results[".title"] = r.Title
76+
case strings.HasSuffix(dataPath, ".link"):
77+
results[".link"] = r.Link
78+
case strings.HasSuffix(dataPath, ".description"):
79+
results[".description"] = r.Description
80+
case strings.HasSuffix(dataPath, ".pubDate"):
81+
results[".pubDate"] = r.PubDate
82+
default:
83+
return nil, fmt.Errorf("Unknown data path %s", dataPath)
84+
}
85+
return results, nil
86+
}
87+
88+
type rangeExpression struct {
89+
first int
90+
last int
91+
}
92+
93+
func getRange(listLength int, exp string) *rangeExpression {
94+
rexp := new(rangeExpression)
95+
rexp.first = 0
96+
rexp.last = listLength - 1
97+
98+
if strings.Contains(exp, "-") == true {
99+
nums := strings.SplitN(exp, "-", 2)
100+
i, err := strconv.Atoi(nums[0])
101+
if err == nil {
102+
rexp.first = i
103+
}
104+
i, err = strconv.Atoi(nums[1])
105+
if err == nil {
106+
rexp.last = i
107+
}
108+
} else {
109+
i, err := strconv.Atoi(exp)
110+
if err == nil {
111+
rexp.first = i
112+
rexp.last = i
113+
}
114+
}
115+
return rexp
116+
}
117+
118+
func (rexp *rangeExpression) inRange(val int) bool {
119+
if val >= rexp.first && val <= rexp.last {
120+
return true
121+
}
122+
return false
123+
}
124+
125+
func (r *RSS2) items(dataPath string) (map[string]interface{}, error) {
126+
rexp := new(rangeExpression)
127+
rexp.first = 0
128+
rexp.last = len(r.ItemList) - 1
129+
130+
// Get the range expression so we know when to add it to results.
131+
s := strings.Index(dataPath, "[")
132+
e := strings.Index(dataPath, "]")
133+
if s >= 0 && e >= 0 {
134+
rexp = getRange(len(r.ItemList), dataPath[s:e])
135+
}
136+
137+
results := make(map[string]interface{})
138+
switch {
139+
case strings.HasSuffix(dataPath, ".title") == true:
140+
vals := []string{}
141+
for i, item := range r.ItemList {
142+
if rexp.inRange(i) == true {
143+
vals = append(vals, item.Title)
144+
}
145+
}
146+
results["title"] = vals
147+
case strings.HasSuffix(dataPath, ".link") == true:
148+
vals := []string{}
149+
for i, item := range r.ItemList {
150+
if rexp.inRange(i) == true {
151+
vals = append(vals, item.Link)
152+
}
153+
}
154+
results["link"] = vals
155+
/*
156+
case strings.HasSuffix(dataPath, ".description") == true:
157+
vals := []string{}
158+
for i, item := range r.ItemList {
159+
if rexp.inRange(i) == true {
160+
vals = append(vals, item.Description)
161+
}
162+
}
163+
results["description"] = vals
164+
case strings.HasSuffix(dataPath, ".content") == true:
165+
vals := []string{}
166+
for i, item := range r.ItemList {
167+
if rexp.inRange(i) == true {
168+
vals = append(vals, item.Content)
169+
}
170+
}
171+
results["content"] = vals
172+
*/
173+
case strings.HasSuffix(dataPath, ".pubDate") == true:
174+
vals := []string{}
175+
for i, item := range r.ItemList {
176+
if rexp.inRange(i) == true {
177+
vals = append(vals, item.PubDate)
178+
}
179+
}
180+
results["pubDate"] = vals
181+
case strings.HasSuffix(dataPath, ".comments") == true:
182+
vals := []string{}
183+
for i, item := range r.ItemList {
184+
if rexp.inRange(i) == true {
185+
vals = append(vals, item.Comments)
186+
}
187+
}
188+
results["comments"] = vals
189+
}
190+
return results, nil
191+
}
192+
193+
// Filter given an RSS2 document return all the entries matching so we
194+
// can apply return each of the data paths requested.
195+
// e.g. .version, .channel.title, .channel.link, .item[].link,
196+
// .item[].guid, .item[].title, .item[].description
197+
func (r *RSS2) Filter(dataPaths []string) (map[string]interface{}, error) {
198+
var (
199+
err error
200+
data map[string]interface{}
201+
)
202+
result := make(map[string]interface{})
203+
for _, dataPath := range dataPaths {
204+
switch {
205+
case strings.Compare(dataPath, ".version") == 0:
206+
result["version"] = r.Version
207+
case strings.HasPrefix(dataPath, ".channel"):
208+
data, err = r.channel(dataPath)
209+
// Merge data into results keyed' by path
210+
for _, val := range data {
211+
result[dataPath] = val
212+
}
213+
case strings.HasPrefix(dataPath, ".item[]"):
214+
data, err = r.items(dataPath)
215+
// Merge data into results keyed' by path
216+
for _, val := range data {
217+
result[dataPath] = val
218+
}
219+
default:
220+
return nil, fmt.Errorf("path %q not found", dataPath)
221+
}
222+
}
223+
if result == nil {
224+
return nil, fmt.Errorf("No data paths found")
225+
}
226+
return result, err
227+
}

rss2_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
//
2+
// rss2 is a golang package for working with RSS 2 feeds and documents.
3+
//
4+
// @author R. S. Doiel, <rsdoiel@caltech.edu>
5+
//
6+
// Copyright (c) 2016, Caltech
7+
// All rights not granted herein are expressly reserved by Caltech.
8+
//
9+
// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10+
//
11+
// 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12+
//
13+
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14+
//
15+
// 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
16+
//
17+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18+
//
19+
package rss2
20+
21+
import (
22+
"net/url"
23+
"strings"
24+
"testing"
25+
)
26+
27+
func TestRSS2(t *testing.T) {
28+
src := []byte(`<?xml version="1.0" encoding="utf-8" ?>
29+
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">
30+
<channel>
31+
32+
<title>CaltechAUTHORS: Title matches "Molecules in solution". Results ordered -Date Deposited. </title>
33+
<link>http://authors.library.caltech.edu/</link>
34+
<atom:link xmlns:atom="http://www.w3.org/2005/Atom" rel="self" href="http://authors.library.caltech.edu/cgi/search/advanced/?output=RSS2&amp;title=Molecules+in+solution" type="application/rss+xml"></atom:link>
35+
<description>1. This is an institutional repository.
36+
2. CaltechAUTHORS holds all types of materials.
37+
3. Deposited items may include:
38+
(a) working drafts
39+
(b) submitted versions (as sent to journals for peer-review)
40+
(c) accepted versions (author's final peer-reviewed drafts)
41+
(d) published versions (publisher-created files)
42+
4. Items are individually tagged with:
43+
(a) their version type and date.
44+
(b) their peer-review status.
45+
(c) their publication status.
46+
5. Principal Languages: English
47+
</description><image>
48+
<url>http://authors.library.caltech.edu/images/codalogo.jpg</url>
49+
<title>CaltechAUTHORS: Title matches "Molecules in solution". Results ordered -Date Deposited. </title>
50+
<link>http://authors.library.caltech.edu/</link></image>
51+
<pubDate>Fri, 12 Aug 2016 15:00:00 -0700</pubDate>
52+
<lastBuildDate>Fri, 12 Aug 2016 15:00:00 -0700</lastBuildDate>
53+
<language>en</language>
54+
<copyright></copyright>
55+
<item>
56+
<pubDate>Mon, 25 Jul 2016 20:48:03 -0700</pubDate>
57+
<title> Flow-through Capture and in Situ Amplification Can Enable Rapid Detection of a Few Single Molecules of Nucleic Acids from Several Milliliters of Solution </title>
58+
<link>http://authors.library.caltech.edu/69188/</link>
59+
<guid>http://authors.library.caltech.edu/69188/</guid>
60+
<description> Schlappi, Travis S. and McCalla, Stephanie E. and Schoepp, Nathan G. and Ismagilov, Rustem F. (2016) Flow-through Capture and in Situ Amplification Can Enable Rapid Detection of a Few Single Molecules of Nucleic Acids from Several Milliliters of Solution. Analytical Chemistry . ISSN 0003-2700. (In Press) http://resolver.caltech.edu/CaltechAUTHORS:20160725-102649276 &lt;http://resolver.caltech.edu/CaltechAUTHORS:20160725-102649276&gt; </description></item>
61+
<item>
62+
<pubDate>Tue, 05 Aug 2014 15:26:26 -0700</pubDate>
63+
<title> Note on Dipole Moments of Molecules in Solution </title>
64+
<link>http://authors.library.caltech.edu/47953/</link>
65+
<guid>http://authors.library.caltech.edu/47953/</guid>
66+
<description> Bauer, S. H. (1936) Note on Dipole Moments of Molecules in Solution. Journal of Chemical Physics, 4 (7). pp. 458-459. ISSN 0021-9606. http://resolver.caltech.edu/CaltechAUTHORS:20140804-165648676 &lt;http://resolver.caltech.edu/CaltechAUTHORS:20140804-165648676&gt; </description></item>
67+
<item>
68+
<pubDate>Tue, 07 Aug 2012 17:07:28 -0700</pubDate>
69+
<title> Solution, surface, and single molecule platforms for the study of DNA-mediated charge transport </title>
70+
<link>http://authors.library.caltech.edu/32968/</link>
71+
<guid>http://authors.library.caltech.edu/32968/</guid>
72+
<description> Muren, Natalie B. and Olmon, Eric D. and Barton, Jacqueline K. (2012) Solution, surface, and single molecule platforms for the study of DNA-mediated charge transport. Physical Chemistry Chemical Physics, 14 (40). pp. 13754-13771. ISSN 1463-9076. PMCID PMC3478128. http://resolver.caltech.edu/CaltechAUTHORS:20120807-093450882 &lt;http://resolver.caltech.edu/CaltechAUTHORS:20120807-093450882&gt; </description></item>
73+
<item>
74+
<pubDate>Wed, 09 Sep 2009 18:17:34 -0700</pubDate>
75+
<title> Direct Emission of I_2 Molecule and IO Radical from the Heterogeneous Reactions of Gaseous Ozone with Aqueous Potassium Iodide Solution </title>
76+
<link>http://authors.library.caltech.edu/15526/</link>
77+
<guid>http://authors.library.caltech.edu/15526/</guid>
78+
<description> Sakamoto, Yosuke and Yabushita, Akihiro and Kawasaki, Masahiro and Enami, Shinichi (2009) Direct Emission of I_2 Molecule and IO Radical from the Heterogeneous Reactions of Gaseous Ozone with Aqueous Potassium Iodide Solution. Journal of Physical Chemistry A, 113 (27). pp. 7707-7713. ISSN 1089-5639. http://resolver.caltech.edu/CaltechAUTHORS:20090901-131930555 &lt;http://resolver.caltech.edu/CaltechAUTHORS:20090901-131930555&gt; </description><media:content url="http://authors.library.caltech.edu/15526/4/preview.png" type="image/png"/></item>
79+
<item>
80+
<pubDate>Fri, 29 Aug 2008 05:05:45 -0700</pubDate>
81+
<title> Unimolecular reaction rates in solution and in the isolated molecule: Comparison of diphenyl butadiene nonradiative decay in solutions and supersonic jets </title>
82+
<link>http://authors.library.caltech.edu/11478/</link>
83+
<guid>http://authors.library.caltech.edu/11478/</guid>
84+
<description> Courtney, S. H. and Fleming, G. R. and Khundkar, L. R. and Zewail, A. H. (1984) Unimolecular reaction rates in solution and in the isolated molecule: Comparison of diphenyl butadiene nonradiative decay in solutions and supersonic jets. Journal of Chemical Physics, 80 (9). pp. 4559-4560. ISSN 0021-9606. http://resolver.caltech.edu/CaltechAUTHORS:COUjcp84 &lt;http://resolver.caltech.edu/CaltechAUTHORS:COUjcp84&gt; </description><media:content url="http://authors.library.caltech.edu/11478/2/preview.png" type="image/png"/></item>
85+
<item>
86+
<title> The osmotic pressure of the ions and of the undissociated molecules of salts in aqueous solution </title>
87+
<link>http://authors.library.caltech.edu/3382/</link>
88+
<guid>http://authors.library.caltech.edu/3382/</guid>
89+
<description> Bates, Stuart J. (1915) The osmotic pressure of the ions and of the undissociated molecules of salts in aqueous solution. Proceedings of the National Academy of Sciences of the United States of America, 1 (6). pp. 363-368. ISSN 0027-8424. http://resolver.caltech.edu/CaltechAUTHORS:BATpnas15 &lt;http://resolver.caltech.edu/CaltechAUTHORS:BATpnas15&gt; </description><media:content url="http://authors.library.caltech.edu/3382/2/preview.png" type="image/png"/></item>
90+
</channel>
91+
</rss>`)
92+
93+
r, err := Parse(src)
94+
if err != nil {
95+
t.Error(err)
96+
t.FailNow()
97+
}
98+
results, err := r.Filter([]string{".item[].title"})
99+
if err != nil {
100+
t.Error(err)
101+
t.FailNow()
102+
}
103+
if len(results[".item[].title"].([]string)) != len(r.ItemList) {
104+
t.Errorf("Expected 6 .item[].title, got %s", strings.Join(results[".item[].title"].([]string), "\t"))
105+
t.FailNow()
106+
}
107+
results, err = r.Filter([]string{".item[].link"})
108+
if err != nil {
109+
t.Errorf("Expected 6 .item[].link, got %+v", strings.Join(results[".item[].title"].([]string), "\t"))
110+
t.FailNow()
111+
}
112+
for _, link := range results[".item[].link"].([]string) {
113+
_, err := url.Parse(link)
114+
if err != nil {
115+
t.Errorf("expected to parse link %q into url, %s", link, err)
116+
}
117+
}
118+
}

0 commit comments

Comments
 (0)