source: trunk/third/perl/lib/charnames.pm @ 14545

Revision 14545, 3.9 KB checked in by ghudson, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r14544, which included commits to RCS files with non-trunk default branches.
Line 
1package charnames;
2use bytes ();           # for $bytes::hint_bits
3$charnames::hint_bits = 0x20000;
4
5my $txt;
6
7# This is not optimized in any way yet
8sub charnames {
9  $name = shift;
10  $txt = do "unicode/Name.pl" unless $txt;
11  my @off;
12  if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
13    @off = ($-[0], $+[0]);
14  }
15  unless (@off) {
16    if ($^H{charnames_short} and $name =~ /^(.*?):(.*)/s) {
17      my ($script, $cname) = ($1,$2);
18      my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
19      if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
20        @off = ($-[0], $+[0]);
21      }
22    }
23  }
24  unless (@off) {
25    my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
26    for ( @{$^H{charnames_scripts}} ) {
27      (@off = ($-[0], $+[0])), last
28        if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
29    }
30  }
31  die "Unknown charname '$name'" unless @off;
32 
33  my $ord = hex substr $txt, $off[0] - 4, 4;
34  if ($^H & $bytes::hint_bits) {        # "use bytes" in effect?
35    use bytes;
36    return chr $ord if $ord <= 255;
37    my $hex = sprintf '%X=0%o', $ord, $ord;
38    my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
39    die "Character 0x$hex with name '$fname' is above 0xFF";
40  }
41  return chr $ord;
42}
43
44sub import {
45  shift;
46  die "`use charnames' needs explicit imports list" unless @_;
47  $^H |= $charnames::hint_bits;
48  $^H{charnames} = \&charnames ;
49  my %h;
50  @h{@_} = (1) x @_;
51  $^H{charnames_full} = delete $h{':full'};
52  $^H{charnames_short} = delete $h{':short'};
53  $^H{charnames_scripts} = [map uc, keys %h];
54}
55
56
571;
58__END__
59
60=head1 NAME
61
62charnames - define character names for C<\N{named}> string literal escape.
63
64=head1 SYNOPSIS
65
66  use charnames ':full';
67  print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
68
69  use charnames ':short';
70  print "\N{greek:Sigma} is an upper-case sigma.\n";
71
72  use charnames qw(cyrillic greek);
73  print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
74
75=head1 DESCRIPTION
76
77Pragma C<use charnames> supports arguments C<:full>, C<:short> and
78script names.  If C<:full> is present, for expansion of
79C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
80standard Unicode names of chars.  If C<:short> is present, and
81C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
82as a letter in script C<SCRIPT>.  If pragma C<use charnames> is used
83with script name arguments, then for C<\N{CHARNAME}}> the name
84C<CHARNAME> is looked up as a letter in the given scripts (in the
85specified order).
86
87For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
88this pragma looks for the names
89
90  SCRIPTNAME CAPITAL LETTER CHARNAME
91  SCRIPTNAME SMALL LETTER CHARNAME
92  SCRIPTNAME LETTER CHARNAME
93
94in the table of standard Unicode names.  If C<CHARNAME> is lowercase,
95then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
96ignored.
97
98=head1 CUSTOM TRANSLATORS
99
100The mechanism of translation of C<\N{...}> escapes is general and not
101hardwired into F<charnames.pm>.  A module can install custom
102translations (inside the scope which C<use>s the module) with the
103following magic incantation:
104
105    use charnames ();           # for $charnames::hint_bits
106    sub import {
107        shift;
108        $^H |= $charnames::hint_bits;
109        $^H{charnames} = \&translator;
110    }
111
112Here translator() is a subroutine which takes C<CHARNAME> as an
113argument, and returns text to insert into the string instead of the
114C<\N{CHARNAME}> escape.  Since the text to insert should be different
115in C<bytes> mode and out of it, the function should check the current
116state of C<bytes>-flag as in:
117
118    use bytes ();                       # for $bytes::hint_bits
119    sub translator {
120        if ($^H & $bytes::hint_bits) {
121            return bytes_translator(@_);
122        }
123        else {
124            return utf8_translator(@_);
125        }
126    }
127
128=head1 BUGS
129
130Since evaluation of the translation function happens in a middle of
131compilation (of a string literal), the translation function should not
132do any C<eval>s or C<require>s.  This restriction should be lifted in
133a future version of Perl.
134
135=cut
Note: See TracBrowser for help on using the repository browser.