;;; sb-heise.el --- heise online shimbun backend ;; Copyright (C) 2004, 2005, 2008, 2009 David Hansen ;; Author: David Hansen ;; Keywords: news ;; This file is a part of shimbun. ;; This is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;;; Code: (require 'shimbun) (luna-define-class shimbun-heise (shimbun) ()) (defvar shimbun-heise-url "http://www.heise.de") (defvar shimbun-heise-group-path-alist '(("news" . "/newsticker/") ("telepolis" . "/tp/"))) (defvar shimbun-heise-content-start "\\(\\|\\|<[^>]*HEISETEXT[^>]*>\\)") (defvar shimbun-heise-content-end "\\(\\|\ \\|<[^>]*/HEISETEXT[^>]*>\\)") (defvar shimbun-heise-x-face-alist '(("default" . "X-Face: #RVD(kjrS;RY\"2yH]w.1U,ZC_DbR,9{tQnhyYe|,\\J)\"\ C*o1{%`*]WwtAuo;reeq_koTr=oIKXFB4#bS'tSdz.Mc%t~-@873uYV>SMjL7D6K$M4L0Up{D\ _rBgD*Xj,t;iPKWh:!B}ijDOoCxs!}rs&(r-TLwU8=>@[w^H(>^u$wM*}\":9LANQs)1\"cZP\ 6ftp?9>b&|rkGR+VWIlD:%?,Fvi8h?q2H+pVqq5#Z9*k2q7.P)0$x!A)T"))) (defvar shimbun-heise-groups (mapcar 'car shimbun-heise-group-path-alist)) (luna-define-method shimbun-index-url ((shimbun shimbun-heise)) (concat shimbun-heise-url (cdr (assoc (shimbun-current-group-internal shimbun) shimbun-heise-group-path-alist)))) (defun shimbun-heise-get-newsticker-headers (shimbun) (let ((regexp "]*>\\([^<]+\\)") (from "Heise Online News ") (date "") (longurl) (id) (url) (subject) (headers)) (catch 'stop (while (re-search-forward regexp nil t nil) (setq longurl (match-string 1)) (setq id (md5 longurl)) (setq url (shimbun-expand-url longurl (shimbun-index-url shimbun))) (setq subject (match-string 2)) (setq id (concat "")) (when (shimbun-search-id shimbun id) (throw 'stop nil)) (push (shimbun-make-header 0 (shimbun-mime-encode-string subject) (shimbun-mime-encode-string from) date id "" 0 0 url) headers))) headers)) (defconst shimbun-heise-date-re "]+class=\"date-cell\"\\s-*>\\sw*,\ \\s-*\\([0-9]+\\)\\.\\([0-9]+\\)\\.\\([0-9]+\\)\\s-*") (defconst shimbun-heise-author-re "\\(.*?\\)

") (defconst shimbun-heise-url-re ".*?\

") (defun shimbun-heise-get-telepolis-headers (shimbun) (let (headers (limit (re-search-forward shimbun-heise-date-re nil t))) (catch 'stop (while limit (goto-char limit) (let ((day (match-string 1)) (month (match-string 2)) (year (match-string 3))) (setq limit (save-excursion (re-search-forward shimbun-heise-date-re nil t))) (save-match-data (while (re-search-forward shimbun-heise-url-re limit t) (let ((url (match-string 1)) (mid (concat "<" (match-string 2) "x" (match-string 3) "@heise.de>")) (subj (match-string 4))) (when (shimbun-search-id shimbun mid) (throw 'stop nil)) (when (re-search-forward shimbun-heise-author-re limit t) (let ((author (match-string 1))) (push (shimbun-create-header 0 subj author (shimbun-make-date-string (string-to-number year) (string-to-number month) (string-to-number day) "00:00" ;; FIXME: timezone is always wrong, slightly better ;; than the default "+0900" "+0000") mid "" 0 0 url) headers))))))))) headers)) (luna-define-method shimbun-get-headers ((shimbun shimbun-heise) &optional range) (if (equal (shimbun-current-group-internal shimbun) "news") (shimbun-heise-get-newsticker-headers shimbun) (shimbun-heise-get-telepolis-headers shimbun))) (defun shimbun-heise-wash-newsticker-article (header) (save-excursion ;; get the real date (let ((regexp-date-begin "
") (regexp-date-end "
") (regexp-date (concat "\\([0-9]+\\)\\.\\([0-9]+\\)\\." "\\([0-9]+\\)[ \t]+\\([0-9]+\\:[0-9]+\\)")) (tmp-point) (bound-point)) (when (setq tmp-point (re-search-forward regexp-date-begin nil t nil)) (when (setq bound-point (re-search-forward regexp-date-end nil t nil)) (goto-char tmp-point) (when (re-search-forward regexp-date bound-point t nil) (shimbun-header-set-date header (shimbun-make-date-string (string-to-number (match-string 3)) ; year (string-to-number (match-string 2)) ; month (string-to-number (match-string 1)) ; day (match-string 4) ; time ;; FIXME: timezone is always wrong, slightly better than the ;; default "+0900" "+0000")))))) ;; get the real from (let ((regexp-from-begin "") (regexp-from-end "
"))))))) (shimbun-remove-tags "" "
") (shimbun-remove-tags "" ""))) (defun shimbun-heise-wash-telepolis-article (header) (save-excursion ;; strip nasty "download" images (goto-char (point-min)) (while (re-search-forward "" nil t nil) (delete-region (point) (re-search-forward "" nil t nil))) (goto-char (point-min)) (while (re-search-forward "