Context Navigation

source: trunk/third/perl/lib/charnames.pm @ 14545

Visit:

Revision 14545, 3.9 KB checked in by ghudson, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r14544, which included commits to RCS files with non-trunk default branches.

Line
1	package charnames;
2	use bytes (); # for $bytes::hint_bits
3	$charnames::hint_bits = 0x20000;
4
5	my $txt;
6
7	# This is not optimized in any way yet
8	sub charnames {
9	$name = shift;
10	$txt = do "unicode/Name.pl" unless $txt;
11	my @off;
12	if ($^H{charnames_full} and $txt =~ /\t\t$name$/m) {
13	@off = ($-[0], $+[0]);
14	}
15	unless (@off) {
16	if ($^H{charnames_short} and $name =~ /^(.?):(.)/s) {
17	my ($script, $cname) = ($1,$2);
18	my $case = ( $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
19	if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U$cname$/m) {
20	@off = ($-[0], $+[0]);
21	}
22	}
23	}
24	unless (@off) {
25	my $case = ( $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL");
26	for ( @{$^H{charnames_scripts}} ) {
27	(@off = ($-[0], $+[0])), last
28	if $txt =~ m/\t\t$_ (?:$case )?LETTER \U$name$/m;
29	}
30	}
31	die "Unknown charname '$name'" unless @off;
32
33	my $ord = hex substr $txt, $off[0] - 4, 4;
34	if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
35	use bytes;
36	return chr $ord if $ord <= 255;
37	my $hex = sprintf '%X=0%o', $ord, $ord;
38	my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
39	die "Character 0x$hex with name '$fname' is above 0xFF";
40	}
41	return chr $ord;
42	}
43
44	sub import {
45	shift;
46	die "`use charnames' needs explicit imports list" unless @_;
47	$^H \|= $charnames::hint_bits;
48	$^H{charnames} = \&charnames ;
49	my %h;
50	@h{@_} = (1) x @_;
51	$^H{charnames_full} = delete $h{':full'};
52	$^H{charnames_short} = delete $h{':short'};
53	$^H{charnames_scripts} = [map uc, keys %h];
54	}
55
56
57	1;
58	__END__
59
60	=head1 NAME
61
62	charnames - define character names for C<\N{named}> string literal escape.
63
64	=head1 SYNOPSIS
65
66	use charnames ':full';
67	print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
68
69	use charnames ':short';
70	print "\N{greek:Sigma} is an upper-case sigma.\n";
71
72	use charnames qw(cyrillic greek);
73	print "\N{sigma} is Greek sigma, and \N{be} is Cyrillic b.\n";
74
75	=head1 DESCRIPTION
76
77	Pragma C<use charnames> supports arguments C<:full>, C<:short> and
78	script names. If C<:full> is present, for expansion of
79	C<\N{CHARNAME}}> string C<CHARNAME> is first looked in the list of
80	standard Unicode names of chars. If C<:short> is present, and
81	C<CHARNAME> has the form C<SCRIPT:CNAME>, then C<CNAME> is looked up
82	as a letter in script C<SCRIPT>. If pragma C<use charnames> is used
83	with script name arguments, then for C<\N{CHARNAME}}> the name
84	C<CHARNAME> is looked up as a letter in the given scripts (in the
85	specified order).
86
87	For lookup of C<CHARNAME> inside a given script C<SCRIPTNAME>
88	this pragma looks for the names
89
90	SCRIPTNAME CAPITAL LETTER CHARNAME
91	SCRIPTNAME SMALL LETTER CHARNAME
92	SCRIPTNAME LETTER CHARNAME
93
94	in the table of standard Unicode names. If C<CHARNAME> is lowercase,
95	then the C<CAPITAL> variant is ignored, otherwise the C<SMALL> variant is
96	ignored.
97
98	=head1 CUSTOM TRANSLATORS
99
100	The mechanism of translation of C<\N{...}> escapes is general and not
101	hardwired into F<charnames.pm>. A module can install custom
102	translations (inside the scope which C<use>s the module) with the
103	following magic incantation:
104
105	use charnames (); # for $charnames::hint_bits
106	sub import {
107	shift;
108	$^H \|= $charnames::hint_bits;
109	$^H{charnames} = \&translator;
110	}
111
112	Here translator() is a subroutine which takes C<CHARNAME> as an
113	argument, and returns text to insert into the string instead of the
114	C<\N{CHARNAME}> escape. Since the text to insert should be different
115	in C<bytes> mode and out of it, the function should check the current
116	state of C<bytes>-flag as in:
117
118	use bytes (); # for $bytes::hint_bits
119	sub translator {
120	if ($^H & $bytes::hint_bits) {
121	return bytes_translator(@_);
122	}
123	else {
124	return utf8_translator(@_);
125	}
126	}
127
128	=head1 BUGS
129
130	Since evaluation of the translation function happens in a middle of
131	compilation (of a string literal), the translation function should not
132	do any C<eval>s or C<require>s. This restriction should be lifted in
133	a future version of Perl.
134
135	=cut

Note: See TracBrowser for help on using the repository browser.

Download in other formats: