;;; sb-yomiuri.el --- shimbun backend for www.yomiuri.co.jp -*- coding: iso-2022-7bit; -*-
;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Authors
;; Author: TSUCHIYA Masatoshi ,
;; Yuuichi Teranishi ,
;; Katsumi Yamaoka
;; Keywords: news
;;; Copyright:
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;; Boston, MA 02110-1301, USA.
;;; Commentary:
;; Original code was nnshimbun.el written by
;; TSUCHIYA Masatoshi .
;;; Code:
(eval-when-compile (require 'cl)) ;; caddr, cadddr, cddddr.
(require 'shimbun)
(luna-define-class shimbun-yomiuri (shimbun-japanese-newspaper shimbun) ())
(defvar shimbun-yomiuri-prefer-text-plain t
"*Non-nil means prefer text/plain articles rather than html articles.")
(defvar shimbun-yomiuri-top-level-domain "yomiuri.co.jp"
"Name of the top level domain for the Yomiuri On-line.")
(defvar shimbun-yomiuri-url
(concat "http://www." shimbun-yomiuri-top-level-domain "/")
"Name of the parent url.")
(defvar shimbun-yomiuri-group-table
(let* ((s0 "[\t\n ]*")
(s1 "[\t\n ]+")
(default
(list
(concat
"]*>" s0
;; 5. subject
"\\([^<]+\\)"
s0 "[^<(]*(" s0
;; 6. month
"\\([01]?[0-9]\\)"
s0 "月" s0
;; 7. day
"\\([0-3]?[0-9]\\)"
s0 "日" s0
;; 8. hour:minute
"\\([012][0-9]:[0-5][0-9]\\)")
1 2 4 3 5 6 7 8))
(default2
(list
(concat
"]*>" s0
;; 5. subject
"\\([^<]+\\)"
s0 "" s0 ""
"\\(?:" s0 "]+>[^<]+\\)?"
s0 "" s0
;; 6. month
"\\([01]?[0-9]\\)"
s0 "月" s0 "" s0 "" s0
;; 7. day
"\\([0-3]?[0-9]\\)"
s0 "日" s0 "")
1 2 4 3 5 6 7))
(kyoiku
(cons (format (car default2) "kyoiku\\(?:/[^\"./]+\\)+")
(cdr default2))))
`(("atmoney" "マネー・経済")
("editorial" "社説・コラム" ""
,(concat "]*>" s0
;; 5. month
;; 6. ja month
"\\(?:\\([01]?[0-9]\\)\\|\\([01]?[0-9]\\)\\)"
"月"
;; 7. day
;; 8. ja day
"\\(?:\\([0-3]?[0-9]\\)\\|\\([0-3]?[0-9]\\)\\)"
"日付[\t\n ・]*"
;; 9. subject
"\\([^<]+\\)"
s0)
1 2 4 3 9 5 7 nil 6 8)
("entertainment" "エンターテインメント")
("iryou" "医療と介護" "news/"
,(concat ">" s0
;; 1. ja genre
"\\(\\cj+\\)"
"[^<]*[^<]*]*>" s0
;; 9. subject
"\\([^<]+\\)"
s0)
2 4 8 5 9 6 7 nil nil nil 3 1)
("kyoiku" "教育" "" ,@kyoiku)
("kyoiku.children" "こども")
("kyoiku.english" "英語"
"http://www.yomiuri.co.jp/kyoiku/learning/english/"
,@kyoiku)
("kyoiku.qanda" "教育Q&A")
("kyoiku.renaissance" "教育ルネサンス"
"http://www.yomiuri.co.jp/kyoiku/renai/")
("kyoiku.special" "特集")
("national" "社会" "" ,@default)
("politics" "政治" "" ,@default)
("science" "科学" "" ,@default)
("sports" "スポーツ" ""
,(concat
"]*>" s0
;; 6. subject
"\\([^<]+\\)"
"\\(?:" s0 "
]+>\\)?" s0 "[^<(]*(" s0
;; 7. month
"\\([01]?[0-9]\\)"
s0 "月" s0
;; 8. day
"\\([0-3]?[0-9]\\)"
s0 "日" s0
;; 9. hour:minute
"\\([012][0-9]:[0-5][0-9]\\)")
1 3 5 4 6 7 8 9 nil nil 2)
("world" "国際" "" ,@default)))
"Alist of group names, their Japanese translations, index pages,
regexps and numbers.
Regexp may contain the \"%s\" token which is replaced with a
regexp-quoted group name. Numbers point to the search result in order
of [0]url, [1,2]serial numbers, [3]year, [4]subject, [5]month, [6]day,
\[7]hour:minute, [8]ja month, [9]ja day, [10]genre and [11]ja genre.")
(defvar shimbun-yomiuri-subgroups-alist
(let* ((s0 "[\t\n ]*")
(s1 "[\t\n ]+")
(default
(list
(concat
"]*>" s0
;; 5. subject
"\\([^<]+\\)"
s0 "[^<(]*(" s0
;; 6. month
"\\([01]?[0-9]\\)"
s0 "月" s0
;; 7. day
"\\([0-3]?[0-9]\\)"
s0 "日\\(?:" s0
;; 8. hour:minute
"\\([012][0-9]:[0-5][0-9]\\)\\)?")
1 2 4 3 5 6 7 8))
(default2
(list
(concat
"]*>" s0
;; 7. subject
"\\([^<]+\\)"
s0)
1 2 6 3 7 4 5))
(default3
(list
(concat
"]*>" s0
;; 8. subject
"\\([^<]+\\)"
s0)
1 3 7 4 8 5 6 nil nil nil 2))
(default4
(list
(concat
"]*>" s0
;; 5. subject
"\\([^<]+\\)"
s0 "" s0 ""
"\\(?:" s0 "]+>[^<]+\\)?"
s0 "" s0
;; 6. month
"\\([01]?[0-9]\\)"
s0 "月" s0 "" s0 "" s0
;; 7. day
"\\([0-3]?[0-9]\\)"
s0 "日" s0 "")
1 2 4 3 5 6 7))
(entertainment
(list
(concat
"alt=\""
;; 1. ja genre
"\\([^\"]+\\)"
"\"[^>]+>" s0 "\\(?:" s0 "[^>]+>\\)?[^<]*]*>" s0
;; 7. subject
"\\([^<]+\\)"
s0 "[^<(]*(" s0
;; 8. month
"\\([01]?[0-9]\\)"
s0 "月" s0
;; 9. day
"\\([0-3]?[0-9]\\)"
s0 "日")
2 4 6 5 7 8 9 nil nil nil 3 1)))
`(("atmoney"
("宝くじ" "lottery" "http://www.yomiuri.co.jp/atmoney/lottery/"
,(format (car default2) "atmoney/lottery") ,@(cdr default2))
("金融ニュース" "mnews" "http://www.yomiuri.co.jp/atmoney/mnews/"
,(format (car default) "atmoney/mnews") ,@(cdr default))
("経済ニュース" "news" "http://www.yomiuri.co.jp/atmoney/news/"
,(format (car default) "atmoney/news") ,@(cdr default))
("新製品情報" "pnews" "http://www.yomiuri.co.jp/atmoney/pnews/"
,(format (car default) "atmoney/pnews") ,@(cdr default)))
("entertainment"
("映画" nil "http://www.yomiuri.co.jp/entertainment/cinema/"
,(format (car entertainment) "cinema") ,@(cdr entertainment))
("donna" "donna" "http://www.yomiuri.co.jp/donna/"
,(concat
"]*>" s0
;; 7. subject
"\\([^<]+\\)")
1 2 3 4 7 5 6)
("DVD情報" "dvd" "http://www.yomiuri.co.jp/entertainment/cinema/dvd/"
,(format (car default) "entertainment/cinema/dvd") ,@(cdr default))
("ジブリをいっぱい" "ghibli"
"http://www.yomiuri.co.jp/entertainment/ghibli/"
,(concat
"]*>" s0
;; 5. subject
"\\([^<]+\\)"
"" s0 "("
;; 6. year
"\\(20[0-9][0-9]\\)" "年"
;; 7. month
"\\([01]?[0-9]\\)" "月"
;; 8. day
"\\([0-3]?[0-9]\\)" "日")
1 3 4 6 5 7 8 nil nil nil 2)
("ヘザーの映画館" "heather"
"http://www.yomiuri.co.jp/entertainment/heather/"
,(format (car entertainment) "heather") ,@(cdr entertainment))
("音楽" "music" "http://www.yomiuri.co.jp/entertainment/music/"
,(format (car default3) "entertainment/music") ,@(cdr default3))
("ニュース" "news" "http://www.yomiuri.co.jp/entertainment/news/"
,(format (car default) "entertainment/news") ,@(cdr default))
("舞台" "stage" "http://www.yomiuri.co.jp/entertainment/stage/"
,(format (car entertainment) "stage") ,@(cdr entertainment))
("TV" "tv" "http://www.yomiuri.co.jp/entertainment/tv/"
,(format (car default) "entertainment/tv") ,@(cdr default))
("Y&Yテレビ" "yy" "http://www.yomiuri.co.jp/entertainment/yy/"
,(format (car default3) "entertainment/yy") ,@(cdr default3)))
("kyoiku"
("ニュース" nil "http://www.yomiuri.co.jp/kyoiku/news/"
,(format (car default4) "kyoiku/news") ,@(cdr default4))
("教育行政" nil "http://www.yomiuri.co.jp/kyoiku/news2/06.htm"
,(format (car default4) "kyoiku/news2") ,@(cdr default4))
("ボランティア・その他" nil
"http://www.yomiuri.co.jp/kyoiku/news2/08.htm"
,(format (car default4) "kyoiku/news2") ,@(cdr default4)))
("kyoiku.children"
("Jキッズ通信" "jkids"
"http://www.yomiuri.co.jp/kyoiku/children/jkids/"
,(format (car default4) "kyoiku/children/jkids") ,@(cdr default4))
("子どもの心" "kodomo.hagukumu"
"http://www.yomiuri.co.jp/kyoiku/hagukumu/kodomo/"
,(format (car default4) "kyoiku/hagukumu/kodomo") ,@(cdr default4))
("ニュースウィークリー" "weekly"
"http://www.yomiuri.co.jp/kyoiku/children/weekly/"
,(format (car default4) "kyoiku/children/weekly") ,@(cdr default4)))
("kyoiku.qanda"
("教育相談" "consul" "http://www.yomiuri.co.jp/kyoiku/qanda/consul/"
,(format (car default4) "kyoiku/qanda/consul") ,@(cdr default4))
("悩みのち晴れ" "worries"
"http://www.yomiuri.co.jp/kyoiku/qanda/worries/"
,(format (car default4) "kyoiku/qanda/worries") ,@(cdr default4)))
("kyoiku.special"
("受験ABC" "s06" "http://www.yomiuri.co.jp/kyoiku/special/s06/"
,(format (car default4) "kyoiku/special/s06") ,@(cdr default4)))
("national"
("文化" "culture" "http://www.yomiuri.co.jp/national/culture/"
,(format (car default) "national/culture/news") ,@(cdr default))
("おくやみ" "obit" "http://www.yomiuri.co.jp/national/obit/"
,(format (car default2) "national/obit/news") ,@(cdr default2)))
("sports"
("エトセトラ" "etc" "http://www.yomiuri.co.jp/sports/etc/"
,(format (car default) "sports/etc/news") ,@(cdr default))
("ゴルフ" "golf" "http://www.yomiuri.co.jp/sports/golf/"
,(format (car default) "sports/golf/news") ,@(cdr default))
("大リーグ" "mlb" "http://www.yomiuri.co.jp/sports/mlb/"
,(format (car default) "sports/mlb/news") ,@(cdr default))
("プロ野球" "npb" "http://www.yomiuri.co.jp/sports/npb/"
,(format (car default) "sports/npb/news") ,@(cdr default))
("サッカー" "soccer" "http://www.yomiuri.co.jp/sports/soccer/"
,(format (car default) "sports/soccer/news") ,@(cdr default))
("東京六大学野球07" "ubb07" "http://www.yomiuri.co.jp/sports/ubb07/"
,(format (car default) "sports/ubb07/news") ,@(cdr default)))))
"Alist of parent groups and lists of subgenres and tables for subgroups.
Each table is the same as the `cdr' of the element of
`shimbun-yomiuri-group-table'.")
(defvar shimbun-yomiuri-content-start
"\n\n\
\\|\n\n\
\\|\n\n\
\\|\n\n")
(defvar shimbun-yomiuri-content-end
"\n\n\
\\|\n\n\
\\|\n\n")
(defvar shimbun-yomiuri-text-content-start
"\n\n\
\\|\n\n\
\\|\n\n")
(defvar shimbun-yomiuri-text-content-end shimbun-yomiuri-content-end)
(defvar shimbun-yomiuri-x-face-alist
'(("default" . "X-Face: #sUhc'&(fVr$~
-+>[\t\n ]*"
nil t)
(delete-region (match-beginning 0) (match-end 0)))
;; Remove nav and track-back button, etc.
(shimbun-remove-tags
"[\t\n ]*"
"
[\t\n ]*"))))))
(luna-define-method shimbun-make-contents :before ((shimbun shimbun-yomiuri)
header)
(shimbun-yomiuri-prepare-article shimbun header))
(luna-define-method shimbun-clear-contents :around ((shimbun shimbun-yomiuri)
header)
(when (luna-call-next-method)
(unless (shimbun-prefer-text-plain-internal shimbun)
(shimbun-break-long-japanese-lines))
t))
(provide 'sb-yomiuri)
;;; sb-yomiuri.el ends here