File:
[LON-CAPA] /
loncom /
localize /
transliterate.pm
Revision
1.1:
download - view:
text,
annotated -
select for diffs
Tue Feb 26 14:42:22 2019 UTC (5 years, 5 months ago) by
raeburn
Branches:
MAIN
CVS tags:
HEAD
- Bug 6792
- Replace a non-ascii character in the filename of uploaded file with an
appropriate ascii character (if available).
- If lonnet::clean_filename() reduces filename to .extension, prepend
timestamp_milliseconds.
1: # The LearningOnline Network with CAPA
2: # Transliteration to ascii
3: #
4: # $Id: transliterate.pm,v 1.1 2019/02/26 14:42:22 raeburn Exp $
5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: ######################################################################
29: ######################################################################
30:
31: =pod
32:
33: =head1 NAME
34:
35: LONCAPA::transliterate - transliterate non-ascii characters
36: in filenames.
37:
38: =head1 SYNOPSIS
39:
40: When fed a filename it will replace instances of non-ascii
41: characters with transliterations.
42:
43: =head1 OVERVIEW
44:
45: Used to replace non-ascii character(s) with a transliteration
46: of the character(s) to ascii character(s).
47:
48: If there are preferred replacements for a particular language
49: then those should be included in a separate subroutine which
50: is called before the transliteration of last resort (which is
51: done with Text::Unidecode).
52:
53: =head1 SUBROUTINES
54:
55: =cut
56:
57: package LONCAPA::transliterate;
58:
59: use strict;
60: use utf8;
61: use Text::Unidecode qw(unidecode);
62: use Encode qw(decode_utf8 encode_utf8);
63:
64: =pod
65:
66: =over
67:
68: =item * fname_to_ascii()
69:
70: Inputs: $fname (required), $language (optional)
71:
72: Output: $fname
73:
74: Replaces non-ascii characters with a transliteration
75: of the character to an ascii character (using Text::Unidecode)
76:
77: If the language code is de, transliteration via
78: german_to_ascii() is used first to handle umlauts and eszett,
79: before using Text::Unidecode.
80:
81: If other routines are added to support preferred transliteration
82: of non-ascii characters for specific languages, they should be
83: added as new subroutines to this file, and then called if the
84: language code has an appropriate value.
85:
86: =back
87:
88: =cut
89:
90: sub fname_to_ascii {
91: my ($fname,$language) = @_;
92: if ($fname =~ /([^\x{00}-\x{7f}])/) {
93: $fname=&decode_utf8($fname);
94: if ($language eq 'de') {
95: $fname = &german_to_ascii($fname);
96: }
97: $fname = unidecode($fname);
98: $fname=&encode_utf8($fname);
99: }
100: return $fname;
101: }
102:
103: =pod
104:
105: =over
106:
107: =item * german_to_ascii()
108:
109: Input: $fname (required)
110:
111: Output: $fname
112:
113: Replaces letters with umlauts with the equivalent letter
114: without an umlaut plus letter e. Case is preserved.
115:
116: Replaces eszett with double s.
117:
118: =back
119:
120: =cut
121:
122: sub german_to_ascii {
123: my ($fname) = @_;
124: my %characters = (
125: 'Ä' => 'AE',
126: 'Ö' => 'OE',
127: 'Ü' => 'UE',
128: 'ä' => 'ae',
129: 'ö' => 'oe',
130: 'ü' => 'ue',
131: 'ß' => 'ss',
132: );
133: $fname =~ s/([ÄäÖöÜüß])/$characters{$1}/g;
134: return $fname;
135: }
136:
137: 1;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>