Skip to content
Open
225 changes: 112 additions & 113 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ var (
"uri": true,
"url": true, // atom 0.3
}

// No known explicit extension parsers for Atom, currently
emptyExtParsers = make(shared.ExtParsers)
)

// Parser is an Atom Parser
Expand All @@ -38,6 +41,14 @@ func (ap *Parser) Parse(feed io.Reader) (*Feed, error) {
return ap.parseRoot(p)
}

func (ap *Parser) ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error) {
entry := &Entry{}
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
return entry, nil
}

func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
if err := p.Expect(xpp.StartTag, "feed"); err != nil {
return nil, err
Expand Down Expand Up @@ -69,7 +80,7 @@ func (ap *Parser) parseRoot(p *xpp.XMLPullParser) (*Feed, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -202,10 +213,6 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}
entry := &Entry{}

contributors := []*Person{}
authors := []*Person{}
categories := []*Category{}
links := []*Link{}
extensions := ext.Extensions{}

for {
Expand All @@ -219,125 +226,20 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
}

if tok == xpp.StartTag {

name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
extensions = e
} else if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return nil, err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return nil, err
}
contributors = append(contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return nil, err
}
authors = append(authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return nil, err
}
categories = append(categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return nil, err
}
links = append(links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return nil, err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return nil, err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
if err := ap.parseEntryContent(p, entry); err != nil {
return nil, err
}
}
}
}

if len(categories) > 0 {
entry.Categories = categories
}

if len(authors) > 0 {
entry.Authors = authors
}

if len(links) > 0 {
entry.Links = links
}

if len(contributors) > 0 {
entry.Contributors = contributors
}

if len(extensions) > 0 {
entry.Extensions = extensions
}
Expand All @@ -349,6 +251,103 @@ func (ap *Parser) parseEntry(p *xpp.XMLPullParser) (*Entry, error) {
return entry, nil
}

func (ap *Parser) parseEntryContent(p *xpp.XMLPullParser, entry *Entry) error {
name := strings.ToLower(p.Name)

if name == "title" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Title = result
} else if name == "id" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.ID = result
} else if name == "rights" ||
name == "copyright" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Rights = result
} else if name == "summary" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Summary = result
} else if name == "source" {
result, err := ap.parseSource(p)
if err != nil {
return err
}
entry.Source = result
} else if name == "updated" ||
name == "modified" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Updated = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.UpdatedParsed = &utcDate
}
} else if name == "contributor" {
result, err := ap.parsePerson("contributor", p)
if err != nil {
return err
}
entry.Contributors = append(entry.Contributors, result)
} else if name == "author" {
result, err := ap.parsePerson("author", p)
if err != nil {
return err
}
entry.Authors = append(entry.Authors, result)
} else if name == "category" {
result, err := ap.parseCategory(p)
if err != nil {
return err
}
entry.Categories = append(entry.Categories, result)
} else if name == "link" {
result, err := ap.parseLink(p)
if err != nil {
return err
}
entry.Links = append(entry.Links, result)
} else if name == "published" ||
name == "issued" {
result, err := ap.parseAtomText(p)
if err != nil {
return err
}
entry.Published = result
date, err := shared.ParseDate(result)
if err == nil {
utcDate := date.UTC()
entry.PublishedParsed = &utcDate
}
} else if name == "content" {
result, err := ap.parseContent(p)
if err != nil {
return err
}
entry.Content = result
} else {
err := p.Skip()
if err != nil {
return err
}
}
return nil
}

func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {

if err := p.Expect(xpp.StartTag, "source"); err != nil {
Expand Down Expand Up @@ -378,7 +377,7 @@ func (ap *Parser) parseSource(p *xpp.XMLPullParser) (*Source, error) {
name := strings.ToLower(p.Name)

if shared.IsExtension(p) {
e, err := shared.ParseExtension(extensions, p)
e, err := shared.ParseExtension(extensions, p, emptyExtParsers)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/ftest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func main() {
if strings.EqualFold(feedType, "rss") ||
strings.EqualFold(feedType, "r") {
p := rss.Parser{}
feed, err = p.Parse(strings.NewReader(fc))
feed, err = p.Parse(strings.NewReader(fc), gofeed.NewParser().BuildRSSExtParsers())
} else if strings.EqualFold(feedType, "atom") ||
strings.EqualFold(feedType, "a") {
p := atom.Parser{}
Expand Down
5 changes: 5 additions & 0 deletions extensions/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ type Extension struct {
Value string `json:"value"`
Attrs map[string]string `json:"attrs"`
Children map[string][]Extension `json:"children"`
Parsed interface{} `json:"parsed,omitempty"`
}

type Extendable interface {
GetExtensions() Extensions
}

func parseTextExtension(name string, extensions map[string][]Extension) (value string) {
Expand Down
35 changes: 33 additions & 2 deletions internal/shared/extparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ import (
"github.com/mmcdole/goxpp"
)

type ExtParser interface {
ParseAsExtension(p *xpp.XMLPullParser) (interface{}, error)
}

type ExtParsers map[string]ExtParser

// IsExtension returns whether or not the current
// XML element is an extension element (if it has a
// non empty prefix)
Expand All @@ -22,10 +28,16 @@ func IsExtension(p *xpp.XMLPullParser) bool {
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser, extParsers ExtParsers) (ext.Extensions, error) {
prefix := prefixForNamespace(p.Space, p)

result, err := parseExtensionElement(p)
var result ext.Extension
var err error
if extParser, ok := extParsers[prefix]; ok {
result, err = parseExtensionFromParser(p, extParser)
} else {
result, err = parseExtensionElement(p)
}
if err != nil {
return nil, err
}
Expand All @@ -43,6 +55,23 @@ func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, er
return fe, nil
}

func parseExtensionFromParser(p *xpp.XMLPullParser, extParser ExtParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
}

e.Name = p.Name
if e.Parsed, err = extParser.ParseAsExtension(p); err != nil {
return e, err
}

if err = p.Expect(xpp.EndTag, e.Name); err != nil {
return e, err
}

return e, nil
}

func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
if err = p.Expect(xpp.StartTag, "*"); err != nil {
return e, err
Expand Down Expand Up @@ -121,6 +150,8 @@ func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
// These canonical prefixes override any prefixes used in the feed itself.
var canonicalNamespaces = map[string]string{
"http://webns.net/mvcb/": "admin",
"http://www.w3.org/2005/Atom": "atom",
"http://purl.org/atom/ns#": "atom03",
"http://purl.org/rss/1.0/modules/aggregation/": "ag",
"http://purl.org/rss/1.0/modules/annotate/": "annotate",
"http://media.tangent.org/rss/1.0/": "audio",
Expand Down
Loading