elisp-vcs/emacs-w3m/shimbun/sb-zeit-de.el
2010-08-17 08:59:01 +02:00

130 lines
4.6 KiB
EmacsLisp

;;; sb-zeit-de.el --- shimbun backend for <http://www.zeit.de>
;; Copyright (C) 2004, 2005, 2006, 2008, 2009
;; Andreas Seltenreich <seltenreich@gmx.de>
;; Author: Andreas Seltenreich <seltenreich@gmx.de>
;; Keywords: news
;; Created: May 23, 2004
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; Macro used to extract groups from the overview-page
;; (fset 'sb-zeit-de-macro [?\C-s ?d ?e ?/ ?\C-m ?\C- ?\C-a ?\C-w ?\"
;; ?\M-f ?\" ?\C-k ?\C-k ?\C-k return ?\C-k])
;;; Code:
(require 'shimbun)
(require 'sb-rss)
(luna-define-class shimbun-zeit-de (shimbun-rss) ())
(defvar shimbun-zeit-de-groups
'("politik" "wirtschaft" "meinung" "gesellschaft" "kultur"
"wissen" "digital" "studium" "karriere" "lebensart" "reisen"
"auto" "sport" "blogs" "news"))
(defvar shimbun-zeit-de-x-face-alist
'(("default" . "X-Face: +@u:6eD3Nq>u{P_Ev&\"A6eW=EA{5H[OqH;|oz7H>atafNFsUS-&7\
%\\qo;KFS%E`=t5Z)'q~lhfl6<7rQ=]")))
(defvar shimbun-zeit-de-content-start
"title\">\\|<!--content starts here-->\\(?:<table[^>]+>\\)?")
(defvar shimbun-zeit-de-content-end
(concat
"</body>\\|</html>\\|navigation[^><]*>[^A]\\|"
"<script language=\"JavaScript1\.2\" type=\"text/javascript\">\\|"
"<div[^>]+\\(class\\|id\\)=\"comments\\|<li class=\"bookmarks\\\|"
"class=\"com\"\\|class=\"toolad\""))
(defvar shimbun-zeit-de-from-address "DieZeit@zeit.de")
(luna-define-method shimbun-headers :before ((shimbun shimbun-zeit-de)
&rest range)
shimbun)
(luna-define-method shimbun-groups ((shimbun shimbun-zeit-de))
shimbun-zeit-de-groups)
(luna-define-method shimbun-get-headers :around ((shimbun shimbun-zeit-de)
&optional range)
(mapc
(lambda (header)
(let ((url (shimbun-header-xref header)))
;; remove the "?from=rss" parameter
(when (string-match "\\(.*\\)\\?from=rss$" url)
(setq url (match-string 1 url)))
(cond ((string-match "\\`http://www\\.zeit\\.de" url)
(shimbun-header-set-xref header (concat url "?page=all")))
((string-match "\\`/" url)
(shimbun-header-set-xref
header (concat "http://www.zeit.de" url))))))
(luna-call-next-method)))
(luna-define-method shimbun-make-contents :before ((shimbun shimbun-zeit-de)
header)
(when (re-search-forward "<script.*window.location='\\(.+?\\)';" nil t)
(let ((url (match-string 1)))
(erase-buffer)
(shimbun-retrieve-url (concat url "?page=all"))))
(let* ((case-fold-search t)
(start (re-search-forward (shimbun-content-start shimbun) nil t))
(end (and start
(re-search-forward (shimbun-content-end shimbun) nil t)
(prog1
(match-beginning 0)
(goto-char start)))))
(setq case-fold-search nil)
(when (re-search-forward "(c)[^Z]*ZEIT[^0-9]*\
\\([0-3][0-9]\\)\\.\\([01][0-9]\\)\\.\\(20[0-9][0-9]\\)"
end t)
(shimbun-header-set-date
header
(shimbun-make-date-string (string-to-number (match-string 3))
(string-to-number (match-string 2))
(string-to-number (match-string 1))
nil
"+02:00"))
(goto-char (point-min)))))
(luna-define-method shimbun-index-url ((shimbun shimbun-zeit-de))
(let ((group (shimbun-current-group shimbun)))
(if (equal "news" group)
"http://newsfeed.zeit.de/"
(concat "http://newsfeed.zeit.de/" group "/index"))))
(luna-define-method shimbun-clear-contents :after ((shimbun shimbun-zeit-de)
header)
;; remove advertisements and 1-pixel-images aka webbugs
(shimbun-remove-tags "<!--START: LESERMEINUNG-->" "<!--ENDE: LESERMEINUNG-->")
(shimbun-remove-tags "<div[^>]*class=\"?\\(?:ad\\|most_read\\)" "</div>")
(shimbun-remove-tags "<a[^>]*doubleclick.net" "</a>")
(shimbun-remove-tags "<IFRAME[^>]*doubleclick.net[^>]*>")
(shimbun-remove-tags "<img[^>]*doubleclick.net[^>]*>")
(shimbun-remove-tags "<img[^>]*\\(width\\|height\\)=\"1px\"[^>]*>")
(shimbun-remove-tags "<tr><td[^>]*>Anzeige</td></tr>")
(shimbun-remove-tags "<span class=\"anzeige\">.+?</span>")
t)
(provide 'sb-zeit-de)
;;; sb-zeit-de.el ends here