Annotation of loncom/localize/transliterate.pm, revision 1.1
1.1 ! raeburn 1: # The LearningOnline Network with CAPA
! 2: # Transliteration to ascii
! 3: #
! 4: # $Id: transliterate.pm,v 1.1 2019/02/25 23:00:55 raeburn Exp $
! 5: #
! 6: # Copyright Michigan State University Board of Trustees
! 7: #
! 8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 9: #
! 10: # LON-CAPA is free software; you can redistribute it and/or modify
! 11: # it under the terms of the GNU General Public License as published by
! 12: # the Free Software Foundation; either version 2 of the License, or
! 13: # (at your option) any later version.
! 14: #
! 15: # LON-CAPA is distributed in the hope that it will be useful,
! 16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 18: # GNU General Public License for more details.
! 19: #
! 20: # You should have received a copy of the GNU General Public License
! 21: # along with LON-CAPA; if not, write to the Free Software
! 22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 23: #
! 24: # /home/httpd/html/adm/gpl.txt
! 25: #
! 26: # http://www.lon-capa.org/
! 27: #
! 28: ######################################################################
! 29: ######################################################################
! 30:
! 31: =pod
! 32:
! 33: =head1 NAME
! 34:
! 35: LONCAPA::transliterate - transliterate non-ascii characters
! 36: in filenames.
! 37:
! 38: =head1 SYNOPSIS
! 39:
! 40: When fed a filename it will replace instances of non-ascii
! 41: characters with transliterations.
! 42:
! 43: =head1 OVERVIEW
! 44:
! 45: Used to replace non-ascii character(s) with a transliteration
! 46: of the character(s) to ascii character(s).
! 47:
! 48: If there are preferred replacements for a particular language
! 49: then those should be included in a separate subroutine which
! 50: is called before the transliteration of last resort (which is
! 51: done with Text::Unidecode).
! 52:
! 53: =head1 SUBROUTINES
! 54:
! 55: =cut
! 56:
! 57: package LONCAPA::transliterate;
! 58:
! 59: use strict;
! 60: use utf8;
! 61: use Text::Unidecode qw(unidecode);
! 62: use Encode qw(decode_utf8 encode_utf8);
! 63:
! 64: =pod
! 65:
! 66: =over
! 67:
! 68: =item * fname_to_ascii()
! 69:
! 70: Inputs: $fname (required), $language (optional)
! 71:
! 72: Output: $fname
! 73:
! 74: Replaces non-ascii characters with a transliteration
! 75: of the character to an ascii character (using Text::Unidecode)
! 76:
! 77: If the language code is de, transliteration via
! 78: german_to_ascii() is used first to handle umlauts and eszett,
! 79: before using Text::Unidecode.
! 80:
! 81: If other routines are added to support preferred transliteration
! 82: of non-ascii characters for specific languages, they should be
! 83: added as new subroutines to this file, and then called if the
! 84: language code has an appropriate value.
! 85:
! 86: =back
! 87:
! 88: =cut
! 89:
! 90: sub fname_to_ascii {
! 91: my ($fname,$language) = @_;
! 92: if ($fname =~ /([^\x{00}-\x{7f}])/) {
! 93: $fname=&decode_utf8($fname);
! 94: if ($language eq 'de') {
! 95: $fname = &german_to_ascii($fname);
! 96: }
! 97: $fname = unidecode($fname);
! 98: $fname=&encode_utf8($fname);
! 99: }
! 100: return $fname;
! 101: }
! 102:
! 103: =pod
! 104:
! 105: =over
! 106:
! 107: =item * german_to_ascii()
! 108:
! 109: Input: $fname (required)
! 110:
! 111: Output: $fname
! 112:
! 113: Replaces letters with umlauts with the equivalent letter
! 114: without an umlaut plus letter e. Case is preserved.
! 115:
! 116: Replaces eszett with double s.
! 117:
! 118: =back
! 119:
! 120: =cut
! 121:
! 122: sub german_to_ascii {
! 123: my ($fname) = @_;
! 124: my %characters = (
! 125: 'Ä' => 'AE',
! 126: 'Ö' => 'OE',
! 127: 'Ü' => 'UE',
! 128: 'ä' => 'ae',
! 129: 'ö' => 'oe',
! 130: 'ü' => 'ue',
! 131: 'ß' => 'ss',
! 132: );
! 133: $fname =~ s/([ÄäÖöÜüß])/$characters{$1}/g;
! 134: return $fname;
! 135: }
! 136:
! 137: 1;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>