;;; shimbun.el --- interfacing with web newspapers -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 ;; Yuuichi Teranishi ;; Author: TSUCHIYA Masatoshi , ;; Akihiro Arisawa , ;; Yuuichi Teranishi , ;; Katsumi Yamaoka ;; Keywords: news ;; This file is the main part of shimbun. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; Original code was nnshimbun.el written by ;; TSUCHIYA Masatoshi . ;; Shimbun API: ;; ;; shimbun-open ;; shimbun-server ;; shimbun-groups ;; shimbun-current-group ;; shimbun-open-group ;; shimbun-close-group ;; shimbun-headers ;; shimbun-reply-to ;; shimbun-x-face ;; shimbun-header-insert ;; shimbun-search-id ;; shimbun-article-expiration-days ;; shimbun-article ;; shimbun-close ;; Shimbun Header API: ;; ;; shimbun-header-subject ;; shimbun-header-set-subject ;; shimbun-header-from ;; shimbun-header-set-from ;; shimbun-header-date ;; shimbun-header-set-date ;; shimbun-header-id ;; shimbun-header-set-id ;; shimbun-header-references ;; shimbun-header-set-references ;; shimbun-header-chars ;; shimbun-header-set-chars ;; shimbun-header-lines ;; shimbun-header-set-lines ;; shimbun-header-xref ;; shimbun-header-set-xref ;; shimbun-header-extra ;; shimbun-header-set-extra ;;; Code: (eval-when-compile (require 'cl)) (eval-when-compile (require 'static)) (require 'mcharset) (require 'eword-encode) (require 'luna) (require 'std11) (require 'w3m) (eval-and-compile (luna-define-class shimbun () (mua server current-group groups x-face x-face-alist url coding-system from-address content-start content-end expiration-days server-name ;; Say whether to prefer text/plain articles. prefer-text-plain ;; Similar to `content-start' and `content-end' ;; but are used to extract html contents for ;; text/plain articles. text-content-start text-content-end ;; Say whether to convert Japanese zenkaku ;; ASCII chars into hankaku. japanese-hankaku ;; Coding system for encoding URIs when ;; accessing the server. url-coding-system ;; Number of times to retry fetching contents. retry-fetching)) (luna-define-internal-accessors 'shimbun)) (defgroup shimbun nil "shimbun - the backend library to read web newspapers." :group 'w3m :group 'hypermedia) (defcustom shimbun-x-face "X-Face: @Q+y!*#)K`rvKfSQnCK.Q\\{T0qs@?plqxVu<=@H-y\ 22NlKSprlFiND7{\"{]&Ddg1=P6{Ze|\n xbW2L1p5ofS\\&u~28A\ dJrT4Cd retry 0) (not retval)) (delete-region (point-min) (point-max)) (setq retval (shimbun-retrieve-url url no-cache no-decode referer coding) retry (1- retry))) (shimbun-message shimbun "shimbun: Retrying to fetch contents...%s" (if retval "done" "failed")) retval)))))) (defun shimbun-real-url (url &optional no-cache) "Return a real URL." (w3m-real-url url no-cache)) (defalias 'shimbun-decode-anchor-string 'w3m-decode-anchor-string) (defalias 'shimbun-decode-entities 'w3m-decode-entities) (defalias 'shimbun-decode-entities-string 'w3m-decode-entities-string) (defalias 'shimbun-expand-url 'w3m-expand-url) (defalias 'shimbun-find-coding-system 'w3m-find-coding-system) (defalias 'shimbun-replace-in-string 'w3m-replace-in-string) (defalias 'shimbun-url-encode-string 'w3m-url-encode-string) ;;; Implementation of Header API. (eval-and-compile (luna-define-class shimbun-header () (number subject from date id references chars lines xref extra)) (luna-define-internal-accessors 'shimbun-header)) (defun shimbun-header-number (header) (shimbun-header-number-internal header)) (defun shimbun-header-set-number (header number) (shimbun-header-set-number-internal header number)) (defun shimbun-header-subject (header &optional no-encode) (if no-encode (shimbun-header-subject-internal header) (shimbun-mime-encode-string (shimbun-header-subject-internal header)))) (defun shimbun-header-normalize (string &optional keep-angle-brackets) (when string (save-match-data ;; This is a trick to keep backward compatibility for ;; `shimbun-header-set-subject' and `shimbun-header-set-from'. (if (string-match eword-encoded-word-regexp string) (eword-decode-string string) (with-temp-buffer (insert string) (unless keep-angle-brackets (shimbun-remove-markup)) (shimbun-decode-entities) (subst-char-in-region (point-min) (point-max) ?\t ?\ t) (subst-char-in-region (point-min) (point-max) ?\r ?\ t) (subst-char-in-region (point-min) (point-max) ?\f ?\ t) (subst-char-in-region (point-min) (point-max) ?\n ?\ t) (goto-char (point-min)) (skip-chars-forward " ") (buffer-substring (point) (progn (goto-char (point-max)) (skip-chars-backward " ") (point)))))))) (defun shimbun-header-set-subject (header subject &optional asis) (shimbun-header-set-subject-internal header (if asis subject (shimbun-header-normalize subject)))) (defun shimbun-header-from (header &optional no-encode) (if no-encode (shimbun-header-from-internal header) (shimbun-mime-encode-string (shimbun-header-from-internal header)))) (defun shimbun-header-set-from (header from &optional asis) (shimbun-header-set-from-internal header (if asis from (shimbun-header-normalize from t)))) (defun shimbun-header-date (header) (shimbun-header-date-internal header)) (defun shimbun-header-set-date (header date &optional asis) (shimbun-header-set-date-internal header (if asis date (shimbun-header-normalize date)))) (defun shimbun-header-id (header) (shimbun-header-id-internal header)) (defun shimbun-header-set-id (header id &optional asis) (shimbun-header-set-id-internal header (if asis id (shimbun-header-normalize id t)))) (defun shimbun-header-references (header) (shimbun-header-references-internal header)) (defun shimbun-header-set-references (header references &optional asis) (shimbun-header-set-references-internal header (if asis references (shimbun-header-normalize references t)))) (defun shimbun-header-chars (header) (shimbun-header-chars-internal header)) (defun shimbun-header-set-chars (header chars) (shimbun-header-set-chars-internal header chars)) (defun shimbun-header-lines (header) (shimbun-header-lines-internal header)) (defun shimbun-header-set-lines (header lines) (shimbun-header-set-lines-internal header lines)) (defun shimbun-header-xref (header) (shimbun-header-xref-internal header)) (defun shimbun-header-set-xref (header xref) (shimbun-header-set-xref-internal header xref)) (defun shimbun-header-extra (header) (shimbun-header-extra-internal header)) (defun shimbun-header-set-extra (header extra) (shimbun-header-set-extra-internal header extra)) (defun shimbun-create-header (&optional number subject from date id references chars lines xref extra asis) "Return a new header for a shimbun article. Because `shimbun-create-header' normalizes arguments with `shimbun-header-normalize' before creating new header object, following operations are unnecessary: * MIME-encoding of subjects and from addresses. * Removal of HTML tags. * Decode of HTML entities. * Replacement of space characteres, such as tab, newline, and linefeed. If optional 11th argument ASIS is non-nil, normalization of header values is suppressed." (let ((new (luna-make-entity 'shimbun-header :number number))) (shimbun-header-set-subject new subject asis) (shimbun-header-set-from new from asis) (shimbun-header-set-date new date asis) (shimbun-header-set-id new id asis) (shimbun-header-set-references new references asis) (shimbun-header-set-chars new chars) (shimbun-header-set-lines new lines) (shimbun-header-set-xref new xref) (shimbun-header-set-extra new extra) new)) (defun shimbun-make-header (&optional number subject from date id references chars lines xref extra) "Return a new header for a shimbun article. This function is obsolete. You should use `shimbun-create-header' instead of this function." (shimbun-create-header number (and subject (eword-decode-string subject)) (and from (eword-decode-string from)) date id references chars lines xref extra t)) ;; Functions for the internal use. (defun shimbun-article-base-url (shimbun header) "Return URL which points the original page specified by HEADER for SHIMBUN." (let ((xref (shimbun-header-xref header))) (if (and xref (eq (aref xref 0) ?/)) (concat (shimbun-url-internal shimbun) xref) xref))) (luna-define-generic shimbun-article-url (shimbun header) "Return URL which points the printable page specified by HEADER for SHIMBUN.") (luna-define-method shimbun-article-url ((shimbun shimbun) header) (shimbun-article-base-url shimbun header)) (defcustom shimbun-encapsulate-images t "*If non-nil, inline images will be encapsulated in the articles. Generated article have a multipart/related content-type." :group 'shimbun :type 'boolean) ;;; Base class for shimbun message entities: (eval-and-compile (luna-define-class shimbun-entity () (type cid data)) (luna-define-internal-accessors 'shimbun-entity)) (luna-define-generic shimbun-entity-type (entity) "Return TYPE of ENTITY.") (luna-define-method shimbun-entity-type ((entity shimbun-entity)) (shimbun-entity-type-internal entity)) (luna-define-generic shimbun-entity-cid (entity) "Return CID of ENTITY.") (luna-define-method shimbun-entity-cid ((entity shimbun-entity)) (shimbun-entity-cid-internal entity)) (defun shimbun-entity-set-cid (entity cid) (shimbun-entity-set-cid-internal entity cid)) (defsetf shimbun-entity-cid shimbun-entity-set-cid) (luna-define-generic shimbun-entity-insert (entity) "Insert ENTITY as a MIME part.") (luna-define-method shimbun-entity-insert ((entity shimbun-entity)) (insert "Content-Type: " (shimbun-entity-type entity) "\n") (when (shimbun-entity-cid entity) (insert "Content-ID: <" (shimbun-entity-cid entity) ">\n")) (insert "\n")) ;;; Class for multipart entities: (eval-and-compile (luna-define-class shimbun-multipart-entity (shimbun-entity) (boundary)) (luna-define-internal-accessors 'shimbun-multipart-entity)) (defvar shimbun-multipart-entity-counter 0) (luna-define-method initialize-instance :before ((entity shimbun-multipart-entity) &rest init-args) (shimbun-multipart-entity-set-boundary-internal entity (apply 'format "===shimbun_%d_%d_%d_%d===" (incf shimbun-multipart-entity-counter) (current-time)))) (defun shimbun-make-multipart-entity (&optional type cid) (luna-make-entity 'shimbun-multipart-entity :type type :cid cid)) (luna-define-method shimbun-entity-type ((entity shimbun-multipart-entity)) (concat (or (shimbun-entity-type-internal entity) (shimbun-entity-set-type-internal entity (catch 'type (dolist (child (shimbun-entity-data-internal entity)) (unless (string-match "\\`text/" (shimbun-entity-type child)) (throw 'type "multipart/related"))) "multipart/mixed"))) "; boundary=\"" (shimbun-multipart-entity-boundary-internal entity) "\"" (when (string= "multipart/related" (shimbun-entity-type-internal entity)) (catch 'start (dolist (child (shimbun-entity-data-internal entity)) (when (string-match "\\`\\(?:text/\\|multipart/mixed\\)" (shimbun-entity-type child)) (throw 'start (concat "; type=\"" (shimbun-entity-type-internal child) "\"; start=\"<" (shimbun-entity-cid child) ">\"")))))))) (luna-define-method shimbun-entity-insert :after ((entity shimbun-multipart-entity)) (let ((boundary (shimbun-multipart-entity-boundary-internal entity))) (dolist (child (shimbun-entity-data-internal entity)) (insert "--" boundary "\n") (shimbun-entity-insert child) (insert (if (bolp) "\n" "\n\n"))) (insert "--" boundary "--\n"))) (defun shimbun-entity-add-child (entity &rest children) (shimbun-entity-set-data-internal entity (nconc (shimbun-entity-data-internal entity) children))) ;;; Class of text entities: (eval-and-compile (luna-define-class shimbun-text-entity (shimbun-entity) (charset)) (luna-define-internal-accessors 'shimbun-text-entity)) (defun shimbun-make-text-entity (type data &optional cid) (luna-make-entity 'shimbun-text-entity :type type :data data :cid cid)) (luna-define-generic shimbun-text-entity-charset (entity &optional begin end) "Return MIME charset of ENTITY.") (luna-define-method shimbun-text-entity-charset ((entity shimbun-text-entity) &optional begin end) (let (cur tmp) (unless (and begin end) (setq cur (current-buffer)) (set-buffer (generate-new-buffer " *temp*")) (insert (shimbun-entity-data-internal entity)) (setq begin (point-min) end (point-max) tmp (current-buffer))) (prog1 ;; Prefer meta charset. (or (let ((charset (progn (goto-char begin) (and (re-search-forward "\ ]*>" end t) (match-string 1))))) (when (and charset (w3m-find-coding-system (intern (downcase charset)))) (shimbun-text-entity-set-charset-internal entity (upcase charset)))) (shimbun-text-entity-charset-internal entity) (shimbun-text-entity-set-charset-internal entity (upcase (symbol-name (detect-mime-charset-region begin end))))) (when cur (set-buffer cur) (kill-buffer tmp))))) (luna-define-method shimbun-entity-type ((entity shimbun-text-entity)) (concat (shimbun-entity-type-internal entity) "; charset=" (shimbun-text-entity-charset entity))) (luna-define-method shimbun-entity-insert :around ((entity shimbun-text-entity)) (save-restriction (narrow-to-region (point) (point)) (insert (shimbun-entity-data-internal entity)) (encode-coding-region (point-min) (point-max) (mime-charset-to-coding-system (shimbun-text-entity-charset entity (point-min) (point-max)))) (goto-char (point-min)) (luna-call-next-method) (goto-char (point-max)))) ;;; Class for image entities: (eval-and-compile (luna-define-class shimbun-image-entity (shimbun-entity) (disposition)) (luna-define-internal-accessors 'shimbun-image-entity)) (luna-define-method initialize-instance :before ((entity shimbun-image-entity) &rest init-args) (shimbun-image-entity-set-disposition-internal entity "inline")) (defun shimbun-make-image-entity (type data &optional cid) (luna-make-entity 'shimbun-image-entity :type type :data data :cid cid)) (luna-define-method shimbun-entity-insert :around ((entity shimbun-image-entity)) (insert "Content-Transfer-Encoding: base64\n" "Content-Disposition: " (shimbun-image-entity-disposition-internal entity) "\n") (luna-call-next-method) (insert (base64-encode-string (shimbun-entity-data-internal entity)))) (defun shimbun-mime-replace-image-tags (shimbun base-cid &optional base-url images) "Replace all IMG tags with references to inlined image parts. This function takes a BASE-CID as a base string for CIDs of inlined image parts, and returns an alist of URLs and image entities." (goto-char (point-min)) (let ((case-fold-search t) start end url img type) (while (and (re-search-forward "\\(<[\t\n\f\r ]*img\\)[\t\n\f\r ]" nil t) (progn (setq start (match-end 1)) (search-forward ">" nil 'move)) (progn (setq end (match-beginning 0)) (goto-char start) (re-search-forward (eval-when-compile (let ((spc "\t\n\f\r ")) (concat "[" spc "]+" ;; 1. replaceable part "\\(src[" spc "]*=[" spc "]*" "\\(?:\"" ;; 2. url quoted with \" "\\([^\"]+\\)" "\"\\|'" ;; 3. url quoted with ' "\\([^']+\\)" "'\\|" ;; 4. url unquoted "\\([^" spc "\"']+\\)" "\\)\\)"))) end t))) (setq start (match-beginning 1) end (match-end 1) url (or (match-string 2) (match-string 3) (match-string 4))) (setq url (shimbun-expand-url ;; See the FIXME comment within the function definition ;; of `w3m-decode-anchor-string' in w3m.el. (shimbun-decode-anchor-string (if (string-match "[\t\n\f\r ]+\\'" url) (substring url 0 (match-beginning 0)) url)) base-url)) (unless (or (setq img (assoc url images)) (and w3m-ignored-image-url-regexp (string-match w3m-ignored-image-url-regexp url))) (with-temp-buffer (set-buffer-multibyte nil) (setq case-fold-search nil type (shimbun-fetch-url shimbun url nil t base-url)) (goto-char (point-min)) (when (or (and (looking-at "\\(GIF8\\)\\|\\(\377\330\\)\\|\211PNG\r\n") (setq type (concat "image/" (cond ((match-beginning 1) "gif") ((match-beginning 2) "jpeg") (t "png"))))) (and type (string-match "\\`image/" type))) (push (setq img (cons url (shimbun-make-image-entity type (buffer-string) (format "shimbun.inline.%d.%s" (length images) base-cid)))) images)))) (when img ;; Only when an image is successfully retrieved, its ;; source URI should be rewritten. (goto-char start) (delete-region start end) (insert "src=\"cid:" (shimbun-entity-cid (cdr img)) "\"")))) images) (defun shimbun-make-mime-article (shimbun header &optional base-url) "Make a MIME article according to SHIMBUN and HEADER. If article have inline images, generated article have a multipart/related content-type if `shimbun-encapsulate-images' is non-nil." (let ((base-cid (shimbun-header-id header)) images) (when (string-match "\\`<\\([^>]+\\)>\\'" base-cid) (setq base-cid (match-string 1 base-cid))) (when shimbun-encapsulate-images (setq images (shimbun-mime-replace-image-tags shimbun base-cid (or base-url (shimbun-article-url shimbun header))))) (let ((body (shimbun-make-text-entity "text/html" (buffer-string)))) (erase-buffer) (when images (let ((new (shimbun-make-multipart-entity))) (setf (shimbun-entity-cid body) (concat "shimbun.0." base-cid)) (shimbun-entity-add-child new body) (apply 'shimbun-entity-add-child new (mapcar 'cdr (nreverse images))) (setq body new))) (shimbun-header-insert shimbun header) (insert "MIME-Version: 1.0\n") (shimbun-entity-insert body)))) (defcustom shimbun-x-face-database-function (if (boundp 'shimbun-use-bbdb-for-x-face) (cdr (assq (symbol-value 'shimbun-use-bbdb-for-x-face) '((t . shimbun-bbdb-get-x-face) (never . never))))) "*Function to get faces from a favorite database. When its initial value is nil and BBDB or LSDB is loaded, it will be set to an appropriate default value. You can set this to `never' if you want to use no database." :group 'shimbun :type '(radio (const :tag "Default" nil) (const :tag "Use no database" never) (const :tag "Use BBDB" shimbun-bbdb-get-x-face) (const :tag "Use LSDB" shimbun-lsdb-get-x-face) (function :format "User defined function: %v\n" :size 0))) (defun shimbun-header-insert (shimbun header) (let ((from (shimbun-header-from header)) (refs (shimbun-header-references header)) (reply-to (shimbun-reply-to shimbun)) x-face) (insert (with-temp-buffer ;; Make sure the temp buffer's multibyteness is true. It is ;; needed to make `encode-mime-charset-string' (which is ;; employed by `eword-encode-string') encode non-ascii text. (static-unless (featurep 'xemacs) (set-buffer-multibyte t)) (insert "Subject: " (or (eword-encode-string (shimbun-header-subject header t)) "(none)") "\nFrom: " (or (eword-encode-string (shimbun-header-from header t)) "(nobody)") "\nDate: " (or (shimbun-header-date header) "") "\nMessage-ID: " (shimbun-header-id header) "\n") (when reply-to (insert "Reply-To: " reply-to "\n")) (when (and refs (string< "" refs)) (insert "References: " refs "\n")) (insert "Lines: " (number-to-string (or (shimbun-header-lines header) 0)) "\n" "Xref: " (or (shimbun-article-base-url shimbun header) "") "\n") (unless shimbun-x-face-database-function (when (and (fboundp 'bbdb-get-field) (not (eq 'autoload (car-safe (symbol-function 'bbdb-get-field)))) (boundp 'bbdb-file) (stringp (symbol-value 'bbdb-file)) (file-exists-p (symbol-value 'bbdb-file))) (setq shimbun-x-face-database-function 'shimbun-bbdb-get-x-face))) (unless shimbun-x-face-database-function (when (and (fboundp 'lsdb-lookup-records) (not (eq 'autoload (car-safe (symbol-function 'lsdb-lookup-records))))) (setq shimbun-x-face-database-function 'shimbun-lsdb-get-x-face))) (when (setq x-face (or (and from (functionp shimbun-x-face-database-function) (funcall shimbun-x-face-database-function from)) (shimbun-x-face shimbun))) (insert x-face) (unless (bolp) (insert "\n"))) (buffer-string))))) (eval-when-compile ;; Attempt to pick up the inline function `bbdb-search-simple'. (condition-case nil (require 'bbdb) (error (autoload 'bbdb-search-simple "bbdb") (autoload 'bbdb-get-field "bbdb")))) (defun shimbun-bbdb-get-x-face (person) "Search a face of a PERSON from BBDB. When missing it, return nil." (let (x) (and (setq x (cadr (mail-extract-address-components person))) (setq x (bbdb-search-simple nil x)) (setq x (bbdb-get-field x 'face)) (not (zerop (length x))) (concat "X-Face: " (mapconcat 'identity (split-string x) "\nX-Face: "))))) (eval-when-compile (condition-case nil (require 'lsdb) (error (autoload 'lsdb-maybe-load-hash-tables "lsdb") (autoload 'lsdb-lookup-records "lsdb")))) (defun shimbun-lsdb-get-x-face (person) "Return a face of a PERSON from LSDB. When missing it, return nil." (lsdb-maybe-load-hash-tables) (let (x) (and (setq x (car (mail-extract-address-components person))) (setq x (car (lsdb-lookup-records x))) (setq x (cdr (assq 'x-face x))) (not (zerop (length x))) (mapconcat (lambda (x-face) (concat "X-Face: " (mapconcat 'identity (split-string x-face) "\n "))) x "\n")))) ;;; Implementation of Shimbun API. (defconst shimbun-attributes '(url groups coding-system server-name from-address content-start content-end x-face-alist expiration-days prefer-text-plain text-content-start text-content-end japanese-hankaku url-coding-system retry-fetching)) (defun shimbun-open (server &optional mua) "Open a shimbun for SERVER. Optional MUA is a `shimbun-mua' instance." (let ((load-path (append shimbun-server-additional-path load-path)) rest subst shimbun) (require (intern (concat "sb-" server))) (setq shimbun (apply 'luna-make-entity (intern (concat "shimbun-" server)) :mua mua :server server (dolist (attr shimbun-attributes (nreverse rest)) (push (intern (format ":%s" attr)) rest) (push (if (setq subst (assq attr '((content-start . text-content-start) (content-end . text-content-end) (text-content-start . content-start) (text-content-end . content-end)))) (or (symbol-value (intern-soft (format "shimbun-%s-%s" server attr))) (symbol-value (intern-soft (format "shimbun-%s-%s" server (cdr subst))))) (symbol-value (intern-soft (format "shimbun-%s-%s" server attr)))) rest)))) (when mua (shimbun-mua-set-shimbun-internal mua shimbun)) shimbun)) (defun shimbun-server (shimbun) "Return the server name of SHIMBUN." (shimbun-server-internal shimbun)) (luna-define-generic shimbun-server-name (shimbun) "Return the server name of SHIMBUN in human-readable style.") (luna-define-method shimbun-server-name ((shimbun shimbun)) (or (shimbun-server-name-internal shimbun) (shimbun-server-internal shimbun))) (luna-define-generic shimbun-groups (shimbun) "Return a list of groups which are available in the SHIMBUN.") (luna-define-method shimbun-groups ((shimbun shimbun)) (shimbun-groups-internal shimbun)) (luna-define-generic shimbun-group-p (shimbun group) "Return non-nil if group is available in the SHIMBUN.") (luna-define-method shimbun-group-p ((shimbun shimbun) group) (member group (shimbun-groups shimbun))) (defun shimbun-current-group (shimbun) "Return the current group of SHIMBUN." (shimbun-current-group-internal shimbun)) (luna-define-generic shimbun-current-group-name (shimbun) "Return the current group name of SHIMBUN in human-readable style.") (luna-define-method shimbun-current-group-name ((shimbun shimbun)) (shimbun-current-group-internal shimbun)) (defun shimbun-open-group (shimbun group) "Open a SHIMBUN GROUP." (if (shimbun-group-p shimbun group) (shimbun-set-current-group-internal shimbun group) (error "No such group %s" group))) (defun shimbun-close-group (shimbun) "Close opened group of SHIMBUN." (when (shimbun-current-group-internal shimbun) (shimbun-set-current-group-internal shimbun nil))) (luna-define-generic shimbun-japanese-hankaku (shimbun) "Say whether to convert Japanese zenkaku ASCII chars into hankaku.") (luna-define-method shimbun-japanese-hankaku ((shimbun shimbun)) (let ((hankaku (or (shimbun-japanese-hankaku-internal shimbun) shimbun-japanese-hankaku))) (unless (eq hankaku 'never) hankaku))) (luna-define-generic shimbun-headers (shimbun &optional range) "Return a SHIMBUN header list. Optional argument RANGE is one of following: nil or `all': Retrieve all header indices. `last': Retrieve the last header index. integer n: Retrieve n pages of header indices.") (defmacro shimbun-header-index-pages (range) "Return number of pages to retrieve according to RANGE. Return nil if all pages should be retrieved." (if (consp range) `(let ((range ,range)) (if (eq 'last range) 1 (if (eq 'all range) nil range))) `(if (eq 'last ,range) 1 (if (eq 'all ,range) nil ,range)))) (defvar shimbun-use-refresh t "Non-nil means honor the REFRESH attribute in META tags. Bind it to nil per shimbun if the refresh brings unwanted page.") ;; FIXME: It seems better that `shimbun-fetch-url' provides the redirection ;; support, whereas it is currently done by `shimbun-headers-1' for headers ;; and `shimbun-article-1' for articles separately. The reason doing so is ;; that `shimbun-article-1' needs to replace urls in XREFs with the redirected ;; ones. (defun shimbun-headers-1 (shimbun url) "Run `shimbun-fetch-url' and refresh the contents if necessary." (let (;; The default url used when it is not specified for refresh. (w3m-current-url url) (w3m-use-refresh shimbun-use-refresh)) (when (and (shimbun-fetch-url shimbun url 'reload) (progn (w3m-check-refresh-attribute) (and w3m-current-refresh (not (string-equal url (cdr w3m-current-refresh)))))) (erase-buffer) (shimbun-headers-1 shimbun (cdr w3m-current-refresh))))) (luna-define-method shimbun-headers ((shimbun shimbun) &optional range) (shimbun-message shimbun (concat shimbun-checking-new-news-format "...")) (prog1 (with-temp-buffer (let ((w3m-verbose (if shimbun-verbose nil w3m-verbose)) headers) (shimbun-headers-1 shimbun (shimbun-index-url shimbun)) (setq headers (shimbun-get-headers shimbun range)) (if (memq (shimbun-japanese-hankaku shimbun) '(body nil)) headers (dolist (header headers headers) (erase-buffer) (insert (shimbun-header-subject-internal header)) (shimbun-japanese-hankaku-buffer) (shimbun-header-set-subject-internal header (buffer-string)))))) (shimbun-message shimbun (concat shimbun-checking-new-news-format "...done")))) (luna-define-generic shimbun-reply-to (shimbun) "Return a reply-to field body for SHIMBUN.") (luna-define-method shimbun-reply-to ((shimbun shimbun)) nil) (luna-define-generic shimbun-x-face (shimbun) "Return a X-Face field string for SHIMBUN.") (luna-define-method shimbun-x-face ((shimbun shimbun)) (shimbun-set-x-face-internal shimbun (let ((group (shimbun-current-group-internal shimbun)) (alist (shimbun-x-face-alist-internal shimbun))) (or (cdr (assoc group alist)) (catch 'face (dolist (elem alist) (when (and (string-match "[]$*+\\^[]" (car elem)) (string-match (car elem) group)) (throw 'face (cdr elem))))) (cdr (assoc "default" alist)) shimbun-x-face)))) (defun shimbun-search-id (shimbun id) "Return non-nil when MUA found a message structure which corresponds to ID." (when (shimbun-mua-internal shimbun) (shimbun-mua-search-id (shimbun-mua-internal shimbun) id))) (defun shimbun-article-expiration-days (shimbun) "Return an expiration day number of SHIMBUN. Return nil when articles are not expired." (shimbun-expiration-days-internal shimbun)) (defun shimbun-content-start (shimbun) "Return the `content-start' value according to SHIMBUN." (if (shimbun-prefer-text-plain-internal shimbun) (or (shimbun-text-content-start-internal shimbun) (shimbun-content-start-internal shimbun)) (or (shimbun-content-start-internal shimbun) (shimbun-text-content-start-internal shimbun)))) (defun shimbun-content-end (shimbun) "Return the `content-end' value according to SHIMBUN." (if (shimbun-prefer-text-plain-internal shimbun) (or (shimbun-text-content-end-internal shimbun) (shimbun-content-end-internal shimbun)) (or (shimbun-content-end-internal shimbun) (shimbun-text-content-end-internal shimbun)))) (luna-define-generic shimbun-from-address (shimbun) "Make a From address like \"SERVER (GROUP)
\".") (luna-define-method shimbun-from-address ((shimbun shimbun)) (let ((addr (or (shimbun-from-address-internal shimbun) (shimbun-reply-to shimbun)))) (if addr (format "%s (%s) <%s>" (shimbun-server-name shimbun) (shimbun-current-group-name shimbun) addr) (format "%s (%s)" (shimbun-server-name shimbun) (shimbun-current-group-name shimbun))))) (luna-define-generic shimbun-article (shimbun header &optional outbuf) "Retrieve a SHIMBUN article which corresponds to HEADER to the OUTBUF. HEADER is a shimbun-header which is obtained by `shimbun-headers'. If OUTBUF is not specified, article is retrieved to the current buffer.") ;; See also the FIXME comment around `shimbun-headers-1'. (defun shimbun-article-1 (shimbun header) "Run `shimbun-fetch-url' and refresh the contents if necessary." (let* ((url (shimbun-article-url shimbun header)) ;; The default url used when it is not specified for refresh. (w3m-current-url url) (w3m-use-refresh shimbun-use-refresh) real) (when (shimbun-fetch-url shimbun url nil nil (shimbun-article-base-url shimbun header)) (w3m-check-refresh-attribute) (if (and w3m-current-refresh ;; The page may specify to refresh itself. (not (string-equal url (cdr w3m-current-refresh)))) (progn (shimbun-header-set-xref header (cdr w3m-current-refresh)) (erase-buffer) (shimbun-article-1 shimbun header)) (unless (string-equal url (setq real (w3m-real-url url))) (shimbun-header-set-xref header real)))))) (luna-define-method shimbun-article ((shimbun shimbun) header &optional outbuf) (when (shimbun-current-group-internal shimbun) (with-current-buffer (or outbuf (current-buffer)) (w3m-insert-string (or (with-temp-buffer (shimbun-article-1 shimbun header) (shimbun-message shimbun "shimbun: Make contents...") (goto-char (point-min)) (prog1 (shimbun-make-contents shimbun header) (shimbun-message shimbun "shimbun: Make contents...done"))) ""))))) (luna-define-generic shimbun-make-contents (shimbun header) "Return a content string of SHIMBUN article using current buffer content. HEADER is a header structure obtained via `shimbun-headers'.") (defun shimbun-insert-footer (shimbun header &optional html &rest args) "Insert the footer and ARGS." (goto-char (point-min)) (when (re-search-forward "[\t\n ]*\\(?:<\\(?:![^>]+\\|br\\|/?div\\|/?p\\)>[\t\n ]*\\)*\\'" nil 'move) (delete-region (match-beginning 0) (point-max))) (apply 'insert "\n" (shimbun-footer shimbun header html) args)) (defun shimbun-current-base-url () "Process BASE tag in the current buffer." (let ((case-fold-search t)) (goto-char (point-min)) (when (re-search-forward "]*\\)?>" nil t) (save-restriction (narrow-to-region (point-min) (point)) (goto-char (point-min)) (when (re-search-forward "\n\n\n\n\n") (shimbun-insert-footer shimbun header t "\n\n")) (shimbun-make-mime-article shimbun header base-url) (buffer-string))) (eval-and-compile (autoload 'shimbun-make-text-contents "sb-text")) (luna-define-method shimbun-make-contents ((shimbun shimbun) header) (if (shimbun-prefer-text-plain-internal shimbun) (shimbun-make-text-contents shimbun header) (shimbun-make-html-contents shimbun header))) (luna-define-generic shimbun-clear-contents (shimbun header) "Clear a content in this current buffer for an article of SHIMBUN. Return nil, unless a content is cleared successfully.") (luna-define-method shimbun-clear-contents ((shimbun shimbun) header) (let ((start (shimbun-content-start shimbun)) (end (shimbun-content-end shimbun)) (case-fold-search t)) (goto-char (point-min)) (when (and (stringp start) (re-search-forward start nil t) (progn (setq start (point)) (stringp end)) (re-search-forward end nil t)) (delete-region (match-beginning 0) (point-max)) (delete-region (point-min) start) (unless (memq (shimbun-japanese-hankaku shimbun) '(header subject nil)) (shimbun-japanese-hankaku-buffer t) (goto-char (point-min))) t))) (luna-define-generic shimbun-footer (shimbun header &optional html) "Make a footer string for SHIMBUN and HEADER.") (luna-define-method shimbun-footer ((shimbun shimbun) header &optional html) "Return a null string for servers that have no footer." "") (luna-define-generic shimbun-index-url (shimbun) "Return a index URL of SHIMBUN.") (luna-define-method shimbun-index-url ((shimbun shimbun)) (shimbun-url-internal shimbun)) (luna-define-generic shimbun-get-headers (shimbun &optional range) "Return a shimbun header list of SHIMBUN. Optional argument RANGE is one of following: nil or `all': Retrieve all header indices. `last': Retrieve the last header index. integer n: Retrieve n pages of header indices.") (luna-define-generic shimbun-close (shimbun) "Close a SHIMBUN.") (luna-define-method shimbun-close ((shimbun shimbun)) (shimbun-close-group shimbun)) ;;; Virtual class for Newspapers: (luna-define-class shimbun-newspaper () ()) (luna-define-method shimbun-footer ((shimbun shimbun-newspaper) header &optional html) (if html (concat "
\n-- 
\n" (shimbun-server-name shimbun) " holds the copyright of this article.
\n" "The original article is " "here.\n
\n") (concat "-- \n" (shimbun-server-name shimbun) " holds the the copyright of this article.\n" "The original article is in:\n" (shimbun-article-base-url shimbun header) "\n"))) ;;; Virtual class for Japanese Newspapers: (luna-define-class shimbun-japanese-newspaper () ()) (luna-define-method shimbun-footer ((shimbun shimbun-japanese-newspaper) header &optional html) (if html (concat "
\n-- 
\n$B$3$N5-;v$NCx:n8"$O!"(B" (shimbun-server-name shimbun) "$B\n$B86J*$O(B " (shimbun-article-base-url shimbun header) " $B$G8x3+$5$l$F$$$^$9!#(B\n
\n") (concat "-- \n$B$3$N5-;v$NCx:n8"$O!"(B" (shimbun-server-name shimbun) "$B\n\n\n\n\n") (shimbun-insert-footer shimbun header t "\n\n")) (shimbun-insert-footer shimbun header)) (encode-mime-charset-string (buffer-string) charset)) (defun shimbun-mime-encode-string (string) (condition-case nil (save-match-data (with-temp-buffer ;; Make sure the temp buffer's multibyteness is true. It is ;; needed to make `encode-mime-charset-string' (which is ;; employed by `eword-encode-string') encode non-ascii text. (static-unless (featurep 'xemacs) (set-buffer-multibyte t)) (mapconcat #'identity (split-string (or (eword-encode-string (shimbun-decode-entities-string string)) "")) " "))) (error string))) (defun shimbun-make-date-string (year month day &optional time timezone) "Make a date string which will be used as shimbun headers. YEAR is a 4-digit number, and MONTH and DAY are also numbers. TIME is a string in the \"HH:MM\" form, where HH is a number of hours and MM is a number of minutes. It defaults to \"00:00\". TIMEZONE defaults to \"+0900\" by the historical reason. You should specify this if a time is represented based on other than the \"+0900\" zone." (setq year (cond ((< year 69) (+ year 2000)) ((< year 100) (+ year 1900)) ((< year 1000) ; possible 3-digit years. (+ year 1900)) ; why isn't it 1000? (t year))) (let ((cts (current-time-string (encode-time 0 0 0 day month year)))) (format "%s, %02d %s %04d %s %s" (substring cts 0 3) day (substring cts 4 7) year (or time "00:00") (or timezone "+0900")))) (autoload 'timezone-fix-time "timezone") (defun shimbun-time-parse-string (string) "Parse the time-string STRING into the Emacs style (HIGH LOW) time." (let ((x (nreverse (append (timezone-fix-time string nil nil) nil)))) (apply 'encode-time (nconc (cdr x) (list (car x)))))) (defun shimbun-decode-time (&optional specified-time specified-zone) "Decode a time value as (SEC MINUTE HOUR DAY MONTH YEAR DOW DST ZONE). This function behaves as well as `decode-time' if the optional 2nd arg SPECIFIED-ZONE is nil. If it is an integer indicating the number of seconds ahead of UTC (east of Greenwich), the return value expresses the local time of the zone that the value indicates. For instance, the following form returns the present time of Japan, wherever you are. \(shimbun-decode-time nil 32400)" (if specified-zone (let* ((tz (- (car (current-time-zone)) specified-zone)) (ct (or specified-time (current-time))) (ms (car ct)) (ls (- (cadr ct) tz))) (cond ((< ls 0) (setq ls (+ ls 65536) ms (1- ms))) ((>= ls 65536) (setq ls (- ls 65536) ms (1+ ms)))) (nconc (nbutlast (decode-time (list ms ls))) (list specified-zone))) (decode-time specified-time))) (defun shimbun-sort-headers (headers) "Return a list of sorted HEADERS by date in increasing order." (sort headers (lambda (a b) (setq a (shimbun-time-parse-string (shimbun-header-date a)) b (shimbun-time-parse-string (shimbun-header-date b))) (or (< (car a) (car b)) (and (= (car a) (car b)) (< (cadr a) (cadr b))))))) (defun shimbun-remove-tags (begin-tag &optional end-tag) "Remove all occurrences of regions surrounded by BEGIN-TAG and END-TAG." (let ((case-fold-search t)) (goto-char (point-min)) (if end-tag (let (pos) (while (and (re-search-forward begin-tag nil t) (setq pos (match-beginning 0)) (re-search-forward end-tag nil t)) (delete-region pos (point)))) (while (re-search-forward begin-tag nil t) (delete-region (match-beginning 0) (match-end 0)))))) (defun shimbun-end-of-tag (&optional tag include-whitespace) "Move point to the end of tag. Inner nested tags are skipped. If TAG, which is a name of the tag, is given, this function moves point from the open-tag (point should exist in front of or within it initially) to the end-point of the closing-tag . For example, in the following two situations, point moves from the leftmost tag to the end-point of the rightmost tag: ............... ............... If TAG is omitted or nil, this function moves point to the end-point of the tag in which point exists. In this case, point should initially exist within the start position of the tag and the next tag as follows: ^^^^^^^^ If INCLUDE-WHITESPACE is non-nil, include leading and trailing whitespace. Return the end-point and set the match-data #0, #1, #2, and #3 as follows (\"___\" shows whitespace): The case where TAG is spefified: ______...______ 0 1 2 2 1 0 INCLUDE-WHITESPACE=nil 0 1 2 3 3 2 1 0 INCLUDE-WHITESPACE=non-nil The case where TAG is nil: ______ 0 0 INCLUDE-WHITESPACE=nil 0 1 1 0 INCLUDE-WHITESPACE=non-nil" (let ((init (point)) (num 1) (md (match-data)) (case-fold-search t) regexp st1 st2 st3 nd1 nd2 nd3 nd0 st0) (condition-case nil (progn (if tag (progn (setq tag (regexp-quote tag)) (if (looking-at (concat "\ \[\t\n\r ]*\\(<[\t\n\r ]*" tag "\\(?:[\t\n\r ]*\\|[\t\n\r ]+[^>]+\\)>\\)\ \[\t\n\r ]*")) (setq st1 (nth 2 (match-data)) ;; (match-beginning 1) st2 (nth 3 (match-data)) ;; (match-end 1) st3 (nth 1 (match-data))) ;; (match-end 0) (search-backward "<") (if (looking-at (concat "\ \\(<[\t\n\r ]*" tag "\\(?:[\t\n\r ]*\\|[\t\n\r ]+[^>]+\\)>\\)[\t\n\r ]*")) (setq st1 (car (match-data)) ;; (match-beginning 0) st2 (nth 3 (match-data)) ;; (match-end 1)) st3 (nth 1 (match-data))) ;; (match-end 0) (error ""))) (goto-char (1+ st1)) (setq regexp (concat "\ \[\t\n\r ]*\\(<\\(/\\)?" tag "\\(?:[\t\n\r ]*\\|[\t\n\r ]+[^>]+\\)>\\)")) (while (and (> num 0) (re-search-forward regexp)) (setq num (if (match-beginning 2) (1- num) (1+ num)))) (setq nd1 (nth 3 (match-data)) ;; (match-end 1) nd2 (nth 2 (match-data)) ;; (match-beginning 1) nd3 (car (match-data)))) ;; (match-beginning 0) (search-backward "<") (setq st1 (car (match-data))) ;; (match-beginning 0) (goto-char init) (while (and (> num 0) (re-search-forward "\\(>\\)\\|<")) (setq num (if (match-beginning 1) (1- num) (1+ num)))) (setq nd1 (nth 3 (match-data)))) ;; (match-end 1) (if include-whitespace (progn (skip-chars-forward "\t\n\r ") (setq nd0 (point-marker)) (goto-char st1) (skip-chars-backward "\t\n\r ") (setq st0 (point-marker)) (goto-char nd0) (set-match-data (if tag (list st0 nd0 st1 nd1 st2 nd2 st3 nd3) (list st0 nd0 st1 nd1)))) (set-match-data (if tag (list st1 nd1 st2 nd2 st3 nd3) (list st1 nd1)))) (point)) (error (set-match-data md) (goto-char init) nil)))) (defun shimbun-remove-markup () "Remove all HTML markup, leaving just plain text." (save-excursion (goto-char (point-min)) (while (search-forward "" nil t) (point-max)))) (goto-char (point-min)) (while (re-search-forward "<[^>]+>" nil t) (replace-match "" t t)))) (defun shimbun-strip-cr () "Strip ^M from the end of all lines." (goto-char (point-max)) (while (search-backward "\r\n" nil t) (delete-char 1))) (if (fboundp 'subst-char-in-string) (defalias 'shimbun-subst-char-in-string 'subst-char-in-string) (defun shimbun-subst-char-in-string (fromchar tochar string &optional inplace) "Replace characters in STRING from FROMCHAR to TOCHAR. Unless optional argument INPLACE is non-nil, return a new string." (let ((string (if inplace string (copy-sequence string))) (len (length string)) (idx 0)) ;; Replace all occurrences of FROMCHAR with TOCHAR. (while (< idx len) (when (= (aref string idx) fromchar) (aset string idx tochar)) (setq idx (1+ idx))) string))) (defun shimbun-message (shimbun fmt &rest args) "Function equivalent to `message' enabling to handle special formats. SHIMBUN is a shimbun entity object. FMT and ARGS are the same as the arguments of `message'. This function allows the following special format specifiers: #g means print a group name. #s means print a server name. #S means print a human-readable server name. Use ## to put a single # into the output. If `shimbun-verbose' is nil, it will run silently. The `shimbun-message-enable-logging' variable controls whether this function should preserve a message in the *Messages* buffer." (let (specifier) (with-temp-buffer (static-unless (featurep 'xemacs) (set-buffer-multibyte t)) (insert fmt) (goto-char (point-min)) (while (search-forward "#" nil t) (setq specifier (char-after)) (delete-region (1- (point)) (1+ (point))) (cond ((eq specifier ?#) (insert "#")) ((eq specifier ?g) (insert (shimbun-current-group-internal shimbun))) ((eq specifier ?s) (insert (shimbun-server-internal shimbun))) ((eq specifier ?S) (insert (or (shimbun-server-name-internal shimbun) (shimbun-server-internal shimbun)))))) (setq fmt (buffer-string)))) (if shimbun-verbose (static-if (featurep 'xemacs) (let ((string (apply 'format fmt args))) (if shimbun-message-enable-logging (display-message 'message string) (display-message 'no-log string)) string) (if shimbun-message-enable-logging (apply 'message fmt args) (let (message-log-max) (apply 'message fmt args)))) (apply 'format fmt args))) (defun shimbun-break-long-japanese-lines (&optional shimbun) "Break long Japanese lines in an article. Article should be charset decoded html data. If SHIMBUN is given, this function will narrow the buffer to just an article using the shimbun class variables `content-start' and `content-end'. Otherwise, it considers the buffer has already been narrowed to an article." (save-restriction (when shimbun (goto-char (point-min)) (let ((case-fold-search t) start) (when (and (re-search-forward (shimbun-content-start shimbun) nil t) (setq start (point)) (re-search-forward (shimbun-content-end shimbun) nil t)) (narrow-to-region start (match-beginning 0))))) (goto-char (point-min)) (while (re-search-forward "]*>\\|

\\|\\([$B!"!#!K!W(B]+\\)\\(\\cj\\)?" nil t) (if (match-beginning 2) (replace-match "\\1\n\\2") (unless (eolp) (insert "\n"))))) (goto-char (point-min))) (defmacro shimbun-with-narrowed-article (shimbun &rest forms) "Narrow to the article in the buffer and evaluate FORMS." `(progn (shimbun-strip-cr) (goto-char (point-min)) (let ((case-fold-search t) start) (when (re-search-forward (shimbun-content-start ,shimbun) nil t) (setq start (match-end 0)) (when (re-search-forward (shimbun-content-end ,shimbun) nil t) (narrow-to-region start (match-beginning 0)) (goto-char (point-min)) ;; Remove trailing whitespace. (while (re-search-forward "[\t ]+$" nil t) (delete-region (match-beginning 0) (match-end 0))) (goto-char (point-min)) ,@forms (widen)) (goto-char (point-min)))))) (static-if (featurep 'xemacs) (defalias 'shimbun-char-category-list 'char-category-list) (defun shimbun-char-category-list (char) "Return a list of category mnemonics for CHAR." (append (category-set-mnemonics (char-category-set char)) nil))) (eval-when-compile (defsubst shimbun-japanese-hankaku-region-1 (start end quote) (save-restriction (narrow-to-region start end) (goto-char start) (when quote (while (re-search-forward "$B!c(B\\(?:[ $B!!(B]\\| \\)?" nil t) (replace-match "<")) (goto-char start) (while (re-search-forward "\\(?:[ $B!!(B]\\| \\)?$B!d(B" nil t) (replace-match ">")) (goto-char start) (while (search-forward "$B!u(B" nil t) (replace-match "&")) (goto-char start)) (while (re-search-forward "\ \\(?:[$B#F#f(B][$B#I#i(B][$B#L#l(B][$B#E#e(B]\\|[$B#F#f(B][$B#T#t(B][$B#P#p(B]\ \\|[$B#H#h(B][$B#T#t(B][$B#T#t(B][$B#P#p(B][$B#S#s(B]?\\|[$B#M#m(B][$B#A#a(B][$B#I#i(B][$B#L#l(B][$B#T#t(B][$B#O#o(B]\\)\ $B!'(B\\cj+" nil t) (japanese-hankaku-region (match-beginning 0) (match-end 0) t)) (goto-char start) (while (re-search-forward "\\([^0-9$B#0(B-$B#9(B]\\)$B!'(B\\|$B!'(B\\([^ 0-9$B!!#0(B-$B#9(B]\\)" nil t) (if (match-beginning 1) (replace-match "\\1:") (replace-match ":\\2") (backward-char 1)) (unless (looking-at " ") (insert " "))) (goto-char start) (while (search-forward "$B!((B" nil t) (replace-match ";") (unless (looking-at "[ $B!!(B]\\| ") (insert " "))) (goto-char start) ;; $B#Z!w#Z(B -> $B#Z(B@$B#Z(B ;; where $B#Z(B is a zenkaku alphanumeric, $B!w(B is a zenkaku symbol. (while (re-search-forward "\\cA[$B!%!-!.!0!2!=!>!?!@!C!G!w(B]\\cA" nil t) (backward-char 2) (insert (prog1 (cdr (assq (char-after) '((?$B!%(B . ?.) (?$B!-(B . ?') (?$B!.(B . ?`) (?$B!0(B . ?^) (?$B!2(B . ?_) (?$B!=(B . ?-) (?$B!>(B . ?-) (?$B!?(B . ?/) (?$B!@(B . ?\\) (?$B!C(B . ?|) (?$B!G(B . ?') (?$B!w(B . ?@)))) (delete-char 1)))) (goto-char start) ;; Replace Chinese hyphen with "$B!](B". (condition-case nil (let ((regexp (concat "[" (list (make-char 'chinese-gb2312 35 45) (make-char 'chinese-big5-1 34 49)) "]"))) (while (re-search-forward regexp nil t) (replace-match "$B!](B"))) (error)) (goto-char start) (while (re-search-forward "[^$B!!!"!#!$!%!2!!A!F!G!H!I!J!K!N!O!P!Q!R!S!a!l!m!o(B]+" nil t) (japanese-hankaku-region (match-beginning 0) (match-end 0) t)) (goto-char start) ;; Exclude ">$B!!(B" in order not to break paragraph start. (while (re-search-forward "\\([!-=?-~]\\)$B!!(B\\|$B!!(B\\([!-~]\\)" nil t) (if (match-beginning 1) (replace-match "\\1 ") (unless (memq (char-before (match-beginning 0)) '(nil ?\n ?>)) (replace-match " \\2")) (backward-char 1))) (goto-char start) (while (re-search-forward "\\([!-~]\\)$B!"(B[ $B!!(B]*\\([!-~]\\)" nil t) (replace-match "\\1, \\2") (backward-char 1)) (goto-char start) (while (re-search-forward "$B!$(B\\(\\cj\\)" nil t) (replace-match "$B!"(B\\1") (backward-char 1)) (goto-char start) (while (re-search-forward "\\(\\cj\\)$B!$(B" nil t) (replace-match "\\1$B!"(B")) (goto-char start) (while (re-search-forward "\\([0-9]\\)$B!$(B\\([0-9][0-9][0-9][^0-9]\\)" nil t) (replace-match "\\1,\\2") (backward-char 2)) (goto-char start) (while (re-search-forward "\ \\([0-9]\\)\\(?:\\($B!%(B\\)\\|\\($B!2(B\\)\\|\\($B!=(B\\)\\|\\($B!>(B\\)\\)\\([0-9]\\)" nil t) (replace-match (cond ((match-beginning 2) "\\1.\\6") ((match-beginning 3) "\\1_\\6") ((or (match-beginning 4) (match-beginning 5)) "\\1-\\6"))) (backward-char 1)) (when (eq w3m-output-coding-system 'utf-8) (goto-char start) (while (re-search-forward "\\([$A!.!0$B!F!H(B]\\)\\|[$A!c$B!G!I!k!l!m(B]" nil t) (if (match-beginning 1) (or (memq (char-before (match-beginning 1)) '(? ?$B!!(B)) (string-equal (buffer-substring (match-beginning 1) (max (- (match-beginning 1) 6) start)) " ") (progn (backward-char 1) (insert " ") (forward-char 1))) (unless (looking-at "?:[ $B!!(B]\\| ") (insert " "))))) ;; Do wakachi-gaki. ;; FIXME:$B!H2V$NCf(B 3$B%H%j%*!I!H%Y%9%H(B 8$B?J=P!I(B (goto-char start) (while (re-search-forward "\\(\\cj\\)\\(?:[ $B!!(B]\\| \\)\\([])>}]\ \\|&#\\(?:62\\|187\\|8217\\|8221\\|8250\\|8969\\|8971\\|9002\\);\ \\|&\\(?:gt\\|raquo\\|rsquo\\|rdquo\\|rsaquo\\|rceil\\|rfloor\\|rang\\);\\)\ \\|\\([(<[{]\\|&#\\(?:60\\|171\\|8216\\|8220\\|8249\\|8968\\|8970\\|9001\\);\ \\|&\\(?:lt\\|laquo\\|lsquo\\|ldquo\\|lsaquo\\|lceil\\|lfloor\\|lang\\);\\)\ \\(?:[ $B!!(B]\\| \\)\\(\\cj\\)" nil t) (replace-match (if (match-beginning 1) "\\1\\2" "\\3\\4")) (backward-char 1)) (goto-char start) (while (re-search-forward "\ \\(\\(?:[^0-9]\\|\\`\\)\\cj\\)\\([0-9]+\\(?:[,.][0-9]+\\)*[^0-9]\\)\ \\|\\(\\(?:[^0-9]\\|\\`[0-9]*\\)[!-/:-=?-~][0-9]+\\(?:[,.][0-9]+\\)*\\)\ \\(\\cj\\)\ \\|\\([0-9]\\)\\(\\cH[^0-9]\\)\ \\|\\(\\cj\\)\\([(A-Za-z}]\ \\|&#\\(?:62\\|187\\|8217\\|8221\\|8250\\|8969\\|8971\\|9002\\);\ \\|&\\(?:gt\\|raquo\\|rsquo\\|rdquo\\|rsaquo\\|rceil\\|rfloor\\|rang\\);\\)\ \\(\\cj\\)" nil t) (cond ((match-beginning 1) (unless (or (and (member (match-string 1) '("$BL@<#(B" "$BBg@5(B" "$B>") (replace-match (concat "\\9 " (match-string 10)))) (goto-char (match-end 9))))) (goto-char start) (let ((regexp (if (eq w3m-output-coding-system 'utf-8) "\\(\\cG\\|\\cg\\)\\(\\cj\\)\\|\\(\\cj\\)\\(\\cG\\|\\cg\\)" "\\(\\cg\\)\\(\\cj\\)\\|\\(\\cj\\)\\(\\cg\\)"))) (while (re-search-forward regexp nil t) (if (match-beginning 1) (unless (eq (char-before) ?$B!!(B) (replace-match "\\1 \\2")) (unless (eq (char-after (match-beginning 3)) ?$B!!(B) (replace-match "\\3 \\4"))) (backward-char 1))) ;; Finally strip useless space. (goto-char start) (while (re-search-forward "\\($B"((B\\) \\([0-9]\\)" nil t) (replace-match "\\1\\2")) (goto-char start) (let ((regexp (if (eq w3m-output-coding-system 'utf-8) (eval-when-compile (let ((chars "$A!2!4!6!8!:!#"#($B!"!#!$!%!&!+!,!1!3!4!5!6!7(B\ $B!A!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![(B")) (concat "\\(?:[ $B!!(B]\\| \\)\\([" chars "$A!f$B!9!n(B]\\)" "\\|\\([" chars "]\\)\\(?:[ $B!!(B]\\| \\)"))) (eval-when-compile (let ((chars "$A!.!0!2!4!6!8!:!!c!d!e!l#"#($B!"!#!$!%!&!+!,!/(B\ $B!1!3!4!5!6!7!A!B!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!k!l!m!x(B")) (concat "\\(?:[ $B!!(B]\\| \\)\\([" chars "$A!f$B!9!n(B]\\)" "\\|\\([" chars "]\\)\\(?:[ $B!!(B]\\| \\)")))))) (while (re-search-forward regexp nil t) (goto-char (match-beginning 0)) (if (match-beginning 1) (if (or (bobp) (eq (save-match-data (when (re-search-backward ">[\t\n ]*" nil t) (match-end 0))) (match-beginning 0))) ;; Don't break paragraph start. (goto-char (match-beginning 1)) (delete-region (goto-char (match-beginning 0)) (match-beginning 1))) (delete-region (match-end 2) (match-end 0))))) (goto-char (point-max))))) (defun shimbun-japanese-hankaku-region (start end &optional quote) "Convert Japanese zenkaku ASCII chars between START and END into hankaku. There are exceptions; some chars and the ones in links aren't converted, and \"$B!c(B\", \"$B!d(B\" and \"$B!u(B\" are quoted if QUOTE is non-nil." (setq end (set-marker (make-marker) end)) (while start (goto-char start) (set-match-data nil) (re-search-forward "... tags are not processed. There are exceptions; some chars aren't converted, and \"$B!c(B\", \"$B!d(B\" and \"$B!u(B\" are quoted if QUOTE is non-nil." (let ((start (goto-char (point-min)))) (while (search-forward "
" nil t)
      (when (> (match-beginning 0) start)
	(shimbun-japanese-hankaku-region start (match-beginning 0) quote)
	(forward-char 5))
      (search-forward "
" nil 'move) (setq start (point))) (unless (eobp) (shimbun-japanese-hankaku-region start (point-max) quote)))) (provide 'shimbun) ;;; shimbun.el ends here