Context Navigation

source: trunk/third/evolution/e-util/ename/e-name-western.c @ 18142

Visit:

Revision 18142, 20.7 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18141, which included commits to RCS files with non-trunk default branches.

Line
1	/* -- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -- */
2	/*
3	* A simple Western name parser.
4	*
5	* <Nat> Jamie, do you know anything about name parsing?
6	* <jwz> Are you going down that rat hole? Bring a flashlight.
7	*
8	* Authors:
9	* Nat Friedman <nat@ximian.com>
10	*
11	* Copyright 1999 - 2001, Ximian, Inc.
12	*/
13
14	#include <ctype.h>
15	#include <string.h>
16	#include <glib.h>
17
18	#include <ename/e-name-western.h>
19	#include <ename/e-name-western-tables.h>
20
21	typedef struct {
22	int prefix_idx;
23	int first_idx;
24	int middle_idx;
25	int nick_idx;
26	int last_idx;
27	int suffix_idx;
28	} ENameWesternIdxs;
29
30	static int
31	e_name_western_str_count_words (char *str)
32	{
33	int word_count;
34	char *p;
35
36	word_count = 0;
37
38	for (p = str; p != NULL; p = strchr (p, ' ')) {
39	word_count ++;
40	p ++;
41	}
42
43	return word_count;
44	}
45
46	static void
47	e_name_western_cleanup_string (char **str)
48	{
49	char *newstr;
50	char *p;
51
52	if (*str == NULL)
53	return;
54
55	/* skip any spaces and commas at the start of the string */
56	p = *str;
57	while (isspace ((unsigned char)p) \|\| p == ',')
58	p ++;
59
60	/* make the copy we're going to return */
61	newstr = g_strdup (p);
62
63	if ( strlen(newstr) > 0) {
64	/* now search from the back, skipping over any spaces and commas */
65	p = newstr + strlen (newstr) - 1;
66	while (isspace ((unsigned char)p) \|\| p == ',')
67	p --;
68	/* advance p to after the character that caused us to exit the
69	previous loop, and end the string. */
70	if ((! isspace ((unsigned char)p)) && p != ',')
71	p ++;
72	*p = '\0';
73	}
74
75	g_free (*str);
76	*str = newstr;
77	}
78
79	static char *
80	e_name_western_get_words_at_idx (char *str, int idx, int num_words)
81	{
82	char *words;
83	char *p;
84	int word_count;
85	int words_len;
86
87	/*
88	* Walk to the end of the words.
89	*/
90	word_count = 0;
91	p = str + idx;
92	while (word_count < num_words && *p != '\0') {
93	while (! isspace ((unsigned char)p) && p != '\0')
94	p ++;
95
96	while (isspace ((unsigned char)p) && p != '\0')
97	p ++;
98
99	word_count ++;
100	}
101
102	words_len = p - str - idx - 1;
103
104	if (*p == '\0')
105	words_len ++;
106
107	words = g_malloc0 (1 + words_len);
108	strncpy (words, str + idx, words_len);
109
110	return words;
111	}
112
113	/*
114	* What the fuck is wrong with glib's MAX macro.
115	*/
116	static int
117	e_name_western_max (const int a, const int b)
118	{
119	if (a > b)
120	return a;
121
122	return b;
123	}
124
125	static gboolean
126	e_name_western_word_is_suffix (char *word)
127	{
128	int i;
129
130	for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) {
131	int length = strlen (e_name_western_sfx_table [i]);
132	if (!g_strcasecmp (word, e_name_western_sfx_table [i]) \|\|
133	( !g_strncasecmp (word, e_name_western_sfx_table [i], length) &&
134	strlen(word) == length + 1 &&
135	word[length] == '.' ))
136	return TRUE;
137	}
138
139	return FALSE;
140	}
141
142	static char *
143	e_name_western_get_one_prefix_at_str (char *str)
144	{
145	char *word;
146	int i;
147
148	/*
149	* Check for prefixes from our table.
150	*/
151	for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) {
152	int pfx_words;
153	char *words;
154
155	pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]);
156	words = e_name_western_get_words_at_idx (str, 0, pfx_words);
157
158	if (! g_strcasecmp (words, e_name_western_pfx_table [i]))
159	return words;
160
161	g_free (words);
162	}
163
164	/*
165	* Check for prefixes we don't know about. These are always a
166	* sequence of more than one letters followed by a period.
167	*/
168	word = e_name_western_get_words_at_idx (str, 0, 1);
169
170	if (strlen (word) > 2 &&
171	isalpha ((unsigned char) word [0]) &&
172	isalpha ((unsigned char) word [1]) &&
173	word [strlen (word) - 1] == '.')
174	return word;
175
176	g_free (word);
177
178	return NULL;
179	}
180
181	static char *
182	e_name_western_get_prefix_at_str (char *str)
183	{
184	char *pfx;
185	char *pfx1;
186	char *pfx2;
187	char *p;
188
189	/* Get the first prefix. */
190	pfx1 = e_name_western_get_one_prefix_at_str (str);
191
192	if (pfx1 == NULL)
193	return NULL;
194
195	/* Check for a second prefix. */
196	p = str + strlen (pfx1);
197	while (isspace ((unsigned char)p) && p != '\0')
198	p ++;
199
200	pfx2 = e_name_western_get_one_prefix_at_str (p);
201
202	if (pfx2 != NULL) {
203	int pfx_len;
204
205	pfx_len = (p + strlen (pfx2)) - str;
206	pfx = g_malloc0 (pfx_len + 1);
207	strncpy (pfx, str, pfx_len);
208	} else {
209	pfx = g_strdup (pfx1);
210	}
211
212	g_free (pfx1);
213	g_free (pfx2);
214
215	return pfx;
216	}
217
218	static void
219	e_name_western_extract_prefix (ENameWestern name, ENameWesternIdxs idxs)
220	{
221	char *pfx;
222
223	pfx = e_name_western_get_prefix_at_str (name->full);
224
225	if (pfx == NULL)
226	return;
227
228	idxs->prefix_idx = 0;
229	name->prefix = pfx;
230	}
231
232	static gboolean
233	e_name_western_is_complex_last_beginning (char *word)
234	{
235	int i;
236
237	for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) {
238
239	if (! g_strcasecmp (
240	word, e_name_western_complex_last_table [i]))
241	return TRUE;
242	}
243
244	return FALSE;
245	}
246
247	static void
248	e_name_western_extract_first (ENameWestern name, ENameWesternIdxs idxs)
249	{
250	/*
251	* If there's a prefix, then the first name is right after it.
252	*/
253	if (idxs->prefix_idx != -1) {
254	int first_idx;
255	char *p;
256
257	first_idx = idxs->prefix_idx + strlen (name->prefix);
258
259	/* Skip past white space. */
260	p = name->full + first_idx;
261	while (isspace ((unsigned char)p) && p != '\0')
262	p++;
263
264	if (*p == '\0')
265	return;
266
267	idxs->first_idx = p - name->full;
268	name->first = e_name_western_get_words_at_idx (
269	name->full, idxs->first_idx, 1);
270
271	} else {
272
273	/*
274	* Otherwise, the first name is probably the first string.
275	*/
276	idxs->first_idx = 0;
277	name->first = e_name_western_get_words_at_idx (
278	name->full, idxs->first_idx, 1);
279	}
280
281	/*
282	* Check that we didn't just assign the beginning of a
283	* compound last name to the first name.
284	*/
285	if (name->first != NULL) {
286	if (e_name_western_is_complex_last_beginning (name->first)) {
287	g_free (name->first);
288	name->first = NULL;
289	idxs->first_idx = -1;
290	}
291	}
292	}
293
294	static void
295	e_name_western_extract_middle (ENameWestern name, ENameWesternIdxs idxs)
296	{
297	char *word;
298	int middle_idx;
299
300	/*
301	* Middle names can only exist if you have a first name.
302	*/
303	if (idxs->first_idx == -1)
304	return;
305
306	middle_idx = idxs->first_idx + strlen (name->first) + 1;
307
308	if (middle_idx > strlen (name->full))
309	return;
310
311	/*
312	* Search for the first space (or the terminating \0)
313	*/
314	while (isspace ((unsigned char)name->full [middle_idx]) &&
315	name->full [middle_idx] != '\0')
316	middle_idx ++;
317
318	if (name->full [middle_idx] == '\0')
319	return;
320
321	/*
322	* Skip past the nickname, if it's there.
323	*/
324	if (name->full [middle_idx] == '\"') {
325	if (idxs->nick_idx == -1)
326	return;
327
328	middle_idx = idxs->nick_idx + strlen (name->nick) + 1;
329
330	while (isspace ((unsigned char)name->full [middle_idx]) &&
331	name->full [middle_idx] != '\0')
332	middle_idx ++;
333
334	if (name->full [middle_idx] == '\0')
335	return;
336	}
337
338	/*
339	* Make sure this isn't the beginning of a complex last name.
340	*/
341	word = e_name_western_get_words_at_idx (name->full, middle_idx, 1);
342	if (e_name_western_is_complex_last_beginning (word)) {
343	g_free (word);
344	return;
345	}
346
347	/*
348	* Make sure this isn't a suffix.
349	*/
350	e_name_western_cleanup_string (& word);
351	if (e_name_western_word_is_suffix (word)) {
352	g_free (word);
353	return;
354	}
355
356	/*
357	* Make sure we didn't just grab a cute nickname.
358	*/
359	if (word [0] == '\"') {
360	g_free (word);
361	return;
362	}
363
364	idxs->middle_idx = middle_idx;
365	name->middle = word;
366	}
367
368	static void
369	e_name_western_extract_nickname (ENameWestern name, ENameWesternIdxs idxs)
370	{
371	int idx;
372	int start_idx;
373	char *str;
374
375	if (idxs->first_idx == -1)
376	return;
377
378	if (idxs->middle_idx > idxs->first_idx)
379	idx = idxs->middle_idx + strlen (name->middle);
380	else
381	idx = idxs->first_idx + strlen (name->first);
382
383	while (name->full [idx] != '\"' && name->full [idx] != '\0')
384	idx ++;
385
386	if (name->full [idx] != '\"')
387	return;
388
389	start_idx = idx;
390
391	/*
392	* Advance to the next double quote.
393	*/
394	idx ++;
395
396	while (name->full [idx] != '\"' && name->full [idx] != '\0')
397	idx ++;
398
399	if (name->full [idx] == '\0')
400	return;
401
402	str = g_malloc0 (idx - start_idx + 2);
403	strncpy (str, name->full + start_idx, idx - start_idx + 1);
404
405	name->nick = str;
406	idxs->nick_idx = start_idx;
407	}
408
409	static int
410	e_name_western_last_get_max_idx (ENameWestern name, ENameWesternIdxs idxs)
411	{
412	int max_idx = -1;
413
414	if (name->prefix != NULL)
415	max_idx = e_name_western_max (
416	max_idx, idxs->prefix_idx + strlen (name->prefix));
417
418	if (name->first != NULL)
419	max_idx = e_name_western_max (
420	max_idx, idxs->first_idx + strlen (name->first));
421
422	if (name->middle != NULL)
423	max_idx = e_name_western_max (
424	max_idx, idxs->middle_idx + strlen (name->middle));
425
426	if (name->nick != NULL)
427	max_idx = e_name_western_max (
428	max_idx, idxs->nick_idx + strlen (name->nick));
429
430	return max_idx;
431	}
432
433	static void
434	e_name_western_extract_last (ENameWestern name, ENameWesternIdxs idxs)
435	{
436	char *word;
437	int idx = -1;
438
439	idx = e_name_western_last_get_max_idx (name, idxs);
440
441	/*
442	* In the case where there is no preceding name element, the
443	* name is either just a first name ("Nat", "John"), is a
444	* single-element name ("Cher", which we treat as a first
445	* name), or is just a last name. The only time we can
446	* differentiate a last name alone from a single-element name
447	* or a first name alone is if it's a complex last name ("de
448	* Icaza", "van Josephsen"). So if there is no preceding name
449	* element, we check to see whether or not the first part of
450	* the name is the beginning of a complex name. If it is,
451	* we subsume the entire string. If we accidentally subsume
452	* the suffix, this will get fixed in the fixup routine.
453	*/
454	if (idx == -1) {
455	word = e_name_western_get_words_at_idx (name->full, 0, 1);
456	if (! e_name_western_is_complex_last_beginning (word)) {
457	g_free (word);
458	return;
459	}
460
461	name->last = g_strdup (name->full);
462	idxs->last_idx = 0;
463	return;
464	}
465
466	/* Skip past the white space. */
467	while (isspace ((unsigned char)name->full [idx]) && name->full [idx] != '\0')
468	idx ++;
469
470	if (name->full [idx] == '\0')
471	return;
472
473	word = e_name_western_get_words_at_idx (name->full, idx, 1);
474	e_name_western_cleanup_string (& word);
475	if (e_name_western_word_is_suffix (word)) {
476	g_free (word);
477	return;
478	}
479	g_free (word);
480
481	/*
482	* Subsume the rest of the string into the last name. If we
483	* accidentally include the prefix, it will get fixed later.
484	* This is the only way to handle things like "Miguel de Icaza
485	* Amozorrutia" without dropping data and forcing the user
486	* to retype it.
487	*/
488	name->last = g_strdup (name->full + idx);
489	idxs->last_idx = idx;
490	}
491
492	static char *
493	e_name_western_get_preceding_word (char *str, int idx)
494	{
495	int word_len;
496	char *word;
497	char *p;
498
499	p = str + idx;
500
501	while (isspace ((unsigned char)*p) && p > str)
502	p --;
503
504	while (! isspace ((unsigned char)*p) && p > str)
505	p --;
506
507	if (isspace ((unsigned char)*p))
508	p ++;
509
510	word_len = (str + idx) - p;
511	word = g_malloc0 (word_len + 1);
512	if (word_len > 0)
513	strncpy (word, p, word_len);
514
515	return word;
516	}
517
518	static char *
519	e_name_western_get_suffix_at_str_end (char *str)
520	{
521	char *suffix;
522	char *p;
523
524	/*
525	* Walk backwards till we reach the beginning of the
526	* (potentially-comma-separated) list of suffixes.
527	*/
528	p = str + strlen (str);
529	while (1) {
530	char *nextp;
531	char *word;
532
533	word = e_name_western_get_preceding_word (str, p - str);
534	nextp = p - strlen (word) - 1;
535
536	e_name_western_cleanup_string (& word);
537
538	if (e_name_western_word_is_suffix (word)) {
539	p = nextp;
540	g_free (word);
541	} else {
542	g_free (word);
543	break;
544	}
545	}
546
547	if (p == (str + strlen (str)))
548	return NULL;
549
550	suffix = g_strdup (p);
551	e_name_western_cleanup_string (& suffix);
552
553	if (strlen (suffix) == 0) {
554	g_free (suffix);
555	return NULL;
556	}
557
558	return suffix;
559	}
560
561	static void
562	e_name_western_extract_suffix (ENameWestern name, ENameWesternIdxs idxs)
563	{
564
565	name->suffix = e_name_western_get_suffix_at_str_end (name->full);
566
567	if (name->suffix == NULL)
568	return;
569
570	idxs->suffix_idx = strlen (name->full) - strlen (name->suffix);
571	}
572
573	static gboolean
574	e_name_western_detect_backwards (ENameWestern name, ENameWesternIdxs idxs)
575	{
576	char *comma;
577	char *word;
578
579	comma = strchr (name->full, ',');
580
581	if (comma == NULL)
582	return FALSE;
583
584	/*
585	* If there's a comma, we need to detect whether it's
586	* separating the last name from the first or just separating
587	* suffixes. So we grab the word which comes before the
588	* comma and check if it's a suffix.
589	*/
590	word = e_name_western_get_preceding_word (name->full, comma - name->full);
591
592	if (e_name_western_word_is_suffix (word)) {
593	g_free (word);
594	return FALSE;
595	}
596
597	g_free (word);
598	return TRUE;
599	}
600
601	static void
602	e_name_western_reorder_asshole (ENameWestern name, ENameWesternIdxs idxs)
603	{
604	char *prefix;
605	char *last;
606	char *suffix;
607	char *firstmidnick;
608	char *newfull;
609
610	char *comma;
611	char *p;
612
613	if (! e_name_western_detect_backwards (name, idxs))
614	return;
615
616	/*
617	* Convert
618	* <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix>
619	* to
620	* <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix>
621	*/
622
623	/*
624	* Grab the prefix from the beginning.
625	*/
626	prefix = e_name_western_get_prefix_at_str (name->full);
627
628	/*
629	* Everything from the end of the prefix to the comma is the
630	* last name.
631	*/
632	comma = strchr (name->full, ',');
633	if (comma == NULL)
634	return;
635
636	p = name->full + (prefix == NULL ? 0 : strlen (prefix));
637
638	while (isspace ((unsigned char)p) && p != '\0')
639	p ++;
640
641	last = g_malloc0 (comma - p + 1);
642	strncpy (last, p, comma - p);
643
644	/*
645	* Get the suffix off the end.
646	*/
647	suffix = e_name_western_get_suffix_at_str_end (name->full);
648
649	/*
650	* Firstmidnick is everything from the comma to the beginning
651	* of the suffix.
652	*/
653	p = comma + 1;
654
655	while (isspace ((unsigned char)p) && p != '\0')
656	p ++;
657
658	if (suffix != NULL) {
659	char *q;
660
661	/*
662	* Point q at the beginning of the suffix.
663	*/
664	q = name->full + strlen (name->full) - strlen (suffix) - 1;
665
666	/*
667	* Walk backwards until we hit the space which
668	* separates the suffix from firstmidnick.
669	*/
670	while (! isspace ((unsigned char)*q) && q > comma)
671	q --;
672
673	if ((q - p + 1) > 0) {
674	firstmidnick = g_malloc0 (q - p + 1);
675	strncpy (firstmidnick, p, q - p);
676	} else
677	firstmidnick = NULL;
678	} else {
679	firstmidnick = g_strdup (p);
680	}
681
682	/*
683	* Create our new reordered version of the name.
684	*/
685	#define NULLSTR(a) ((a) == NULL ? "" : (a))
686	newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick),
687	NULLSTR (last), NULLSTR (suffix));
688	g_strstrip (newfull);
689	g_free (name->full);
690	name->full = newfull;
691
692
693	g_free (prefix);
694	g_free (firstmidnick);
695	g_free (last);
696	g_free (suffix);
697	}
698
699	static void
700	e_name_western_zap_nil (char *str, int idx)
701	{
702	if (*str == NULL)
703	return;
704
705	if (strlen (*str) != 0)
706	return;
707
708	*idx = -1;
709	g_free (*str);
710	*str = NULL;
711	}
712
713	#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
714	char *last_start = NULL; \
715	if (name->last) \
716	last_start = strchr (name->last, ' '); \
717	if (last_start) { \
718	char new_last, new_first; \
719	\
720	new_last = g_strdup (last_start + 1); \
721	*last_start = '\0'; \
722	\
723	idxs->last_idx += (last_start - name->last) + 1; \
724	\
725	new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \
726	\
727	g_free (name->first); \
728	g_free (name->middle); \
729	g_free (name->last); \
730	\
731	name->first = new_first; \
732	name->middle = NULL; \
733	name->last = new_last; \
734	\
735	idxs->middle_idx = -1; \
736	} else { \
737	char *new_first; \
738	\
739	new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \
740	\
741	g_free (name->first); \
742	g_free (name->middle); \
743	g_free (name->last); \
744	\
745	name->first = new_first; \
746	name->middle = NULL; \
747	name->last = NULL; \
748	idxs->middle_idx = -1; \
749	idxs->last_idx = -1; \
750	}
751
752	#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
753	if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) { \
754	FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
755	}
756
757	#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \
758	if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) { \
759	FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
760	}
761
762	static void
763	e_name_western_fixup (ENameWestern name, ENameWesternIdxs idxs)
764	{
765	/*
766	* The middle and last names cannot be the same.
767	*/
768	if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) {
769	idxs->middle_idx = -1;
770	g_free (name->middle);
771	name->middle = NULL;
772	}
773
774	/*
775	* If we have a middle name and no last name, then we mistook
776	* the last name for the middle name.
777	*/
778	if (idxs->last_idx == -1 && idxs->middle_idx != -1) {
779	idxs->last_idx = idxs->middle_idx;
780	name->last = name->middle;
781	name->middle = NULL;
782	idxs->middle_idx = -1;
783	}
784
785	/*
786	* Check to see if we accidentally included the suffix in the
787	* last name.
788	*/
789	if (idxs->suffix_idx != -1 && idxs->last_idx != -1 &&
790	idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) {
791	char *sfx;
792
793	sfx = name->last + (idxs->suffix_idx - idxs->last_idx);
794	if (sfx != NULL) {
795	char *newlast;
796	char *p;
797
798	p = sfx - 1;
799	while (isspace ((unsigned char)*p) && p > name->last)
800	p --;
801	p ++;
802
803	newlast = g_malloc0 (p - name->last + 1);
804	strncpy (newlast, name->last, p - name->last);
805	g_free (name->last);
806	name->last = newlast;
807	}
808	}
809
810	/*
811	* If we have a prefix and a first name, but no last name,
812	* then we need to assign the first name to the last name.
813	* This way we get things like "Mr Friedman" correctly.
814	*/
815	if (idxs->first_idx != -1 && idxs->prefix_idx != -1 &&
816	idxs->last_idx == -1) {
817	name->last = name->first;
818	idxs->last_idx = idxs->first_idx;
819	idxs->first_idx = -1;
820	name->first = NULL;
821	}
822
823	if (idxs->middle_idx != -1) {
824	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&");
825	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*");
826	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\|");
827	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^");
828	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&");
829	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\|\|");
830	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+");
831	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-");
832	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and");
833	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or");
834	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus");
835
836	/* Spanish */
837	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y");
838
839	/* German */
840	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und");
841
842	/* Italian */
843	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e");
844
845	/* Czech */
846	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a");
847
848	/* Finnish */
849	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja");
850
851	/* French */
852	CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et");
853
854	/* Russian */
855	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */
856	CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */
857	}
858
859	/*
860	* Remove stray spaces and commas (although there don't seem
861	* to be any in the test cases, they might show up later).
862	*/
863	e_name_western_cleanup_string (& name->prefix);
864	e_name_western_cleanup_string (& name->first);
865	e_name_western_cleanup_string (& name->middle);
866	e_name_western_cleanup_string (& name->nick);
867	e_name_western_cleanup_string (& name->last);
868	e_name_western_cleanup_string (& name->suffix);
869
870	/*
871	* Make zero-length strings just NULL.
872	*/
873	e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx);
874	e_name_western_zap_nil (& name->first, & idxs->first_idx);
875	e_name_western_zap_nil (& name->middle, & idxs->middle_idx);
876	e_name_western_zap_nil (& name->nick, & idxs->nick_idx);
877	e_name_western_zap_nil (& name->last, & idxs->last_idx);
878	e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx);
879	}
880
881	/**
882	* e_name_western_western_parse_fullname:
883	* @full_name: A string containing a Western name.
884	*
885	* Parses @full_name and returns an #ENameWestern object filled with
886	* the component parts of the name.
887	*/
888	ENameWestern *
889	e_name_western_parse (const char *full_name)
890	{
891	ENameWesternIdxs *idxs;
892	ENameWestern *wname;
893
894	wname = g_new0 (ENameWestern, 1);
895
896	wname->full = g_strdup (full_name);
897
898	idxs = g_new0 (ENameWesternIdxs, 1);
899
900	idxs->prefix_idx = -1;
901	idxs->first_idx = -1;
902	idxs->middle_idx = -1;
903	idxs->nick_idx = -1;
904	idxs->last_idx = -1;
905	idxs->suffix_idx = -1;
906
907	/*
908	* An extremely simple algorithm.
909	*
910	* The goal here is to get it right 95% of the time for
911	* Western names.
912	*
913	* First we check to see if this is an ass-backwards name
914	* ("Prefix Last, First Middle Suffix"). These names really
915	* suck (imagine "Dr von Johnson, Albert Roderick Jr"), so
916	* we reorder them first and then parse them.
917	*
918	* Next, we grab the most obvious assignments for the various
919	* parts of the name. Once this is done, we check for stupid
920	* errors and fix them up.
921	*/
922	e_name_western_reorder_asshole (wname, idxs);
923
924	e_name_western_extract_prefix (wname, idxs);
925	e_name_western_extract_first (wname, idxs);
926	e_name_western_extract_nickname (wname, idxs);
927	e_name_western_extract_middle (wname, idxs);
928	e_name_western_extract_last (wname, idxs);
929	e_name_western_extract_suffix (wname, idxs);
930
931	e_name_western_fixup (wname, idxs);
932
933	g_free (idxs);
934
935	return wname;
936	}
937
938	/**
939	* e_name_western_free:
940	* @name: An ENameWestern object which needs to be freed.
941	*
942	* Deep-frees @name
943	*/
944	void
945	e_name_western_free (ENameWestern *w)
946	{
947
948	g_free (w->prefix);
949	g_free (w->first);
950	g_free (w->middle);
951	g_free (w->nick);
952	g_free (w->last);
953	g_free (w->suffix);
954
955	g_free (w->full);
956
957	g_free (w);
958	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: