require Exporter;
use C4::Context;
use C4::Output; # to get the template
+use C4::Charset;
use C4::Circulation::Circ2; # getpatroninformation
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
flagsrequired => {borrow => 1},
});
- print $query->header(-cookie => $cookie), $template->output;
+ print $query->header(
+ -type => guesstype($template->output),
+ -cookie => $cookie
+ ), $template->output;
=head1 DESCRIPTION
-expires => '+1y');
}
return ($userid, $cookie, $sessionID, $flags);
- exit;
}
# else we have a problem...
# get the inputs from the incoming query
$cookie=$query->cookie(-name => 'sessionID',
-value => $sessionID,
-expires => '+1y');
- print $query->header(-cookie=>$cookie), $template->output;
+ print $query->header(
+ -type => guesstype($template->output),
+ -cookie => $cookie
+ ), $template->output;
exit;
}
--- /dev/null
+package C4::Charset;
+
+# $Id$
+
+#package to work around problems in HTTP headers
+# Note: This is just a utility module; it should not be instantiated.
+
+
+# Copyright 2003 Katipo Communications
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+
+use strict;
+require Exporter;
+
+use vars qw($VERSION @ISA @EXPORT);
+
+# set the version for version checking
+$VERSION = 0.01;
+
+=head1 NAME
+
+C4::Charset - Functions for handling charsets in HTML pages
+
+=head1 SYNOPSIS
+
+ use C4::Charset;
+
+ print $query->header(-type => C4::Charset::gettype($output)), $output;
+
+=head1 DESCRIPTION
+
+The functions in this module peek into a piece of HTML and return strings
+related to the (guessed) charset.
+
+=head1 FUNCTIONS
+
+=over 2
+
+=cut
+
+@ISA = qw(Exporter);
+@EXPORT = qw(
+ &guesscharset
+ &guesstype
+ );
+
+=pod
+
+ &guesscharset($output)
+
+"Guesses" the charset from the some HTML that would be output.
+
+C<$output> is the HTML page to be output. If it contains a META tag
+with a Content-Type, the tag will be scanned for a language code.
+This code is returned if it is found; undef is returned otherwise.
+
+This function only does sloppy guessing; it will be confused by
+unexpected things like SGML comments. What it basically does is to
+grab something that looks like a META tag and scan it.
+
+=cut
+
+sub guesscharset ($) {
+ my($html) = @_;
+ my $charset = undef;
+ local($`, $&, $', $1, $2, $3);
+ # FIXME... These regular expressions will miss a lot of valid tags!
+ if ($html =~ /<meta\s+http-equiv=(["']?)Content-Type\1\s+content=(["'])text\/html\s*;\s*charset=([^\2\s\r\n]+)\2\s*(?:\/?)>/is) {
+ $charset = $3;
+ } elsif ($html =~ /<meta\s+content=(["'])text\/html\s*;\s*charset=([^\1\s\r\n]+)\1\s+http-equiv=(["']?)Content-Type\3\s*(?:\/?)>/is) {
+ $charset = $2;
+ }
+ return $charset;
+} # guess
+
+sub guesstype ($) {
+ my($html) = @_;
+ my $charset = guesscharset($html);
+ return defined $charset? "text/html; charset=$charset": "text/html";
+}
+
+#---------------------------------
+
+END { } # module clean-up code here (global destructor)
+
+1;
+__END__
+
+=back
+
+=head1 AUTHOR
+
+Koha Developement team <info@koha.org>
+
+=cut
--- /dev/null
+use strict;
+use C4::Charset;
+
+use vars qw( @tests );
+use vars qw( $loaded );
+
+BEGIN {
+ @tests = (
+ [
+ 'Normal HTML without meta tag',
+ sub { C4::Charset::guesscharset($_[0]) },
+ undef,
+ <<EOF
+<title>control case</title>
+EOF
+ ], [
+ 'Result of guesscharset with normal HTML with irrelevant meta tag',
+ sub { C4::Charset::guesscharset($_[0]) },
+ undef,
+ <<EOF
+<meta http-equiv="Content-Language" content="zh-TW">
+EOF
+ ], [
+ 'Result of guesscharset with normal HTML with irrelevant meta tag',
+ sub { C4::Charset::guesstype($_[0]) },
+ undef,
+ <<EOF
+<meta http-equiv="Content-Language" content="zh-TW">
+EOF
+ ], [
+ 'Result of guesscharset with normal HTML with relevant meta tag',
+ sub { C4::Charset::guesscharset($_[0]) },
+ 'big5',
+ <<EOF
+<meta http-equiv="Content-Type" content="text/html; charset=big5">
+EOF
+ ], [
+ 'Result of guesstype with normal HTML with relevant meta tag',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=big5',
+ <<EOF
+<meta http-equiv="Content-Type" content="text/html; charset=big5">
+EOF
+ ], [
+ 'Variant 1 using single quotes',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=iso-2022-jp',
+ <<EOF
+<meta http-equiv="Content-Type" content='text/html; charset=iso-2022-jp'>
+EOF
+ ], [
+ 'Variant 2 using single quotes',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=utf-8',
+ <<EOF
+<meta http-equiv='Content-Type' content="text/html; charset=utf-8">
+EOF
+ ], [
+ 'Unquoted Content-Type',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=big5',
+ <<EOF
+<meta http-equiv=Content-Type content="text/html; charset=big5">
+EOF
+ ], [
+ 'XML syntax',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=iso-8859-2',
+ <<EOF
+<meta http-equiv=Content-Type content="text/html; charset=iso-8859-2" />
+EOF
+ ], [
+ 'Expected attributes in reverse order',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=big5',
+ <<EOF
+<meta content="text/html; charset=big5" http-equiv="Content-Type">
+EOF
+ ], [
+ 'Extra whitespace at end',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=big5',
+ <<EOF
+<meta http-equiv="Content-Type" content="text/html; charset=big5" >
+EOF
+ ], [
+ 'Multiple lines',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=big5',
+ <<EOF
+<meta
+http-equiv="Content-Type"
+content="text/html; charset=big5"
+>
+EOF
+ ], [
+ 'With surrounding HTML',
+ sub { C4::Charset::guesstype($_[0]) },
+ 'text/html; charset=us-ascii',
+ <<EOF
+<html>
+<head>
+<title>Test case with surrounding HTML</title>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
+</head>
+<body>
+The return value should not be contaiminated with any surround HTML
+FIXME: Auth.pm returns in code that can contaminate the charset
+FIXME: if we do not explicitly disallow whitespace in the charset
+</body>
+</html>
+EOF
+ ],
+);
+}
+
+BEGIN { $| = 1; printf "1..%d\n", scalar(@tests); }
+END {print "not ok 1\n" unless $loaded;}
+$loaded = 1;
+
+
+# Run all tests in sequence
+for (my $i = 1; $i <= scalar @tests; $i += 1) {
+ my $test = $tests[$i - 1];
+ my($title, $f, $expected, $input) = @$test;
+ die "not ok $i (malformed test case)\n"
+ unless @$test == 4 && ref $f eq 'CODE';
+
+ my $output = &$f($input);
+ if (
+ (!defined $output && !defined $expected)
+ || (defined $output && defined $expected && $output eq $expected)
+ ) {
+ print "ok $i ($title)\n";
+ } else {
+ print "not ok $i ($title: got ",
+ (defined $output? "\"$output\"": 'undef'),
+ ', expected ',
+ (defined $expected? "\"$expected\"": 'undef'),
+ ")\n";
+ }
+}
+
+
+
+
+