source: trunk/third/evolution/e-util/ename/e-name-western.c @ 16787

Revision 16787, 20.3 KB checked in by ghudson, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r16786, which included commits to RCS files with non-trunk default branches.
Line 
1/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2/*
3 * A simple Western name parser.
4 *
5 * <Nat> Jamie, do you know anything about name parsing?
6 * <jwz> Are you going down that rat hole?  Bring a flashlight.
7 *
8 * Authors:
9 *   Nat Friedman <nat@ximian.com>
10 *
11 * Copyright 1999 - 2001, Ximian, Inc.
12 */
13
14#include <ctype.h>
15#include <string.h>
16#include <glib.h>
17 
18#include <ename/e-name-western.h>
19#include <ename/e-name-western-tables.h>
20
21typedef struct {
22        int prefix_idx;
23        int first_idx;
24        int middle_idx;
25        int nick_idx;
26        int last_idx;
27        int suffix_idx;
28} ENameWesternIdxs;
29
30static int
31e_name_western_str_count_words (char *str)
32{
33        int word_count;
34        char *p;
35
36        word_count = 0;
37
38        for (p = str; p != NULL; p = strchr (p, ' ')) {
39                word_count ++;
40                p ++;
41        }
42
43        return word_count;
44}
45
46static void
47e_name_western_cleanup_string (char **str)
48{
49        char *newstr;
50        char *p;
51
52        if (*str == NULL)
53                return;
54
55        /* skip any spaces and commas at the start of the string */
56        p = *str;
57        while (isspace (*p) || *p == ',')
58                p ++;
59
60        /* make the copy we're going to return */
61        newstr = g_strdup (p);
62
63        if ( strlen(newstr) > 0) {
64                /* now search from the back, skipping over any spaces and commas */
65                p = newstr + strlen (newstr) - 1;
66                while (isspace (*p) || *p == ',')
67                        p --;
68                /* advance p to after the character that caused us to exit the
69                   previous loop, and end the string. */
70                if ((! isspace (*p)) && *p != ',')
71                        p ++;
72                *p = '\0';
73        }
74
75        g_free (*str);
76        *str = newstr;
77}
78
79static char *
80e_name_western_get_words_at_idx (char *str, int idx, int num_words)
81{
82        char *words;
83        char *p;
84        int   word_count;
85        int   words_len;
86
87        /*
88         * Walk to the end of the words.
89         */
90        word_count = 0;
91        p = str + idx;
92        while (word_count < num_words && *p != '\0') {
93                while (! isspace (*p) && *p != '\0')
94                        p ++;
95
96                while (isspace (*p) && *p != '\0')
97                        p ++;
98
99                word_count ++;
100        }
101
102        words_len = p - str - idx - 1;
103
104        if (*p == '\0')
105                words_len ++;
106
107        words = g_malloc0 (1 + words_len);
108        strncpy (words, str + idx, words_len);
109
110        return words;
111}
112
113/*
114 * What the fuck is wrong with glib's MAX macro.
115 */
116static int
117e_name_western_max (const int a, const int b)
118{
119        if (a > b)
120                return a;
121
122        return b;
123}
124
125static gboolean
126e_name_western_word_is_suffix (char *word)
127{
128        int i;
129
130        for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) {
131                if (g_strcasecmp (word, e_name_western_sfx_table [i]))
132                        continue;
133
134                return TRUE;
135        }
136
137        return FALSE;
138}
139
140static char *
141e_name_western_get_one_prefix_at_str (char *str)
142{
143        char *word;
144        int   i;
145
146        /*
147         * Check for prefixes from our table.
148         */
149        for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) {
150                int pfx_words;
151                char *words;
152
153                pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]);
154                words = e_name_western_get_words_at_idx (str, 0, pfx_words);
155
156                if (! g_strcasecmp (words, e_name_western_pfx_table [i]))
157                        return words;
158
159                g_free (words);
160        }
161
162        /*
163         * Check for prefixes we don't know about.  These are always a
164         * sequence of more than one letters followed by a period.
165         */
166        word = e_name_western_get_words_at_idx (str, 0, 1);
167
168        if (strlen (word) > 2 &&
169            isalpha ((unsigned char) word [0]) &&
170            isalpha ((unsigned char) word [1]) &&
171            word [strlen (word) - 1] == '.')
172                return word;
173
174        g_free (word);
175
176        return NULL;
177}
178
179static char *
180e_name_western_get_prefix_at_str (char *str)
181{
182        char *pfx;
183        char *pfx1;
184        char *pfx2;
185        char *p;
186
187        /* Get the first prefix. */
188        pfx1 = e_name_western_get_one_prefix_at_str (str);
189
190        if (pfx1 == NULL)
191                return NULL;
192
193        /* Check for a second prefix. */
194        p = str + strlen (pfx1);
195        while (isspace (*p) && *p != '\0')
196                p ++;
197
198        pfx2 = e_name_western_get_one_prefix_at_str (p);
199
200        if (pfx2 != NULL) {
201                int pfx_len;
202
203                pfx_len = (p + strlen (pfx2)) - str;
204                pfx = g_malloc0 (pfx_len + 1);
205                strncpy (pfx, str, pfx_len);
206        } else {
207                pfx = g_strdup (pfx1);
208        }
209
210        g_free (pfx1);
211        g_free (pfx2);
212
213        return pfx;
214}
215
216static void
217e_name_western_extract_prefix (ENameWestern *name, ENameWesternIdxs *idxs)
218{
219        char *pfx;
220
221        pfx = e_name_western_get_prefix_at_str (name->full);
222
223        if (pfx == NULL)
224                return;
225
226        idxs->prefix_idx = 0;
227        name->prefix     = pfx;
228}
229
230static gboolean
231e_name_western_is_complex_last_beginning (char *word)
232{
233        int i;
234
235        for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) {
236
237                if (! g_strcasecmp (
238                        word, e_name_western_complex_last_table [i]))
239                        return TRUE;
240        }
241
242        return FALSE;
243}
244
245static void
246e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs)
247{
248        /*
249         * If there's a prefix, then the first name is right after it.
250         */
251        if (idxs->prefix_idx != -1) {
252                int   first_idx;
253                char *p;
254
255                first_idx = idxs->prefix_idx + strlen (name->prefix);
256
257                /* Skip past white space. */
258                p = name->full + first_idx;
259                while (isspace (*p) && *p != '\0')
260                        p++;
261
262                if (*p == '\0')
263                        return;
264
265                idxs->first_idx = p - name->full;
266                name->first = e_name_western_get_words_at_idx (
267                        name->full, idxs->first_idx, 1);
268
269        } else {
270
271                /*
272                 * Otherwise, the first name is probably the first string.
273                 */
274                idxs->first_idx = 0;
275                name->first = e_name_western_get_words_at_idx (
276                        name->full, idxs->first_idx, 1);
277        }
278
279        /*
280         * Check that we didn't just assign the beginning of a
281         * compound last name to the first name.
282         */
283        if (name->first != NULL) {
284                if (e_name_western_is_complex_last_beginning (name->first)) {
285                        g_free (name->first);
286                        name->first = NULL;
287                        idxs->first_idx = -1;
288                }
289        }
290}
291
292static void
293e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
294{
295        char *word;
296        int   middle_idx;
297
298        /*
299         * Middle names can only exist if you have a first name.
300         */
301        if (idxs->first_idx == -1)
302                return;
303
304        middle_idx = idxs->first_idx + strlen (name->first) + 1;
305
306        if (middle_idx > strlen (name->full))
307                return;
308       
309        /*
310         * Search for the first space (or the terminating \0)
311         */
312        while (isspace (name->full [middle_idx]) &&
313               name->full [middle_idx] != '\0')
314                middle_idx ++;
315               
316        if (name->full [middle_idx] == '\0')
317                return;
318
319        /*
320         * Skip past the nickname, if it's there.
321         */
322        if (name->full [middle_idx] == '\"') {
323                if (idxs->nick_idx == -1)
324                        return;
325
326                middle_idx = idxs->nick_idx + strlen (name->nick) + 1;
327               
328                while (isspace (name->full [middle_idx]) &&
329                       name->full [middle_idx] != '\0')
330                        middle_idx ++;
331
332                if (name->full [middle_idx] == '\0')
333                        return;
334        }
335
336        /*
337         * Make sure this isn't the beginning of a complex last name.
338         */
339        word = e_name_western_get_words_at_idx (name->full, middle_idx, 1);
340        if (e_name_western_is_complex_last_beginning (word)) {
341                g_free (word);
342                return;
343        }
344
345        /*
346         * Make sure this isn't a suffix.
347         */
348        e_name_western_cleanup_string (& word);
349        if (e_name_western_word_is_suffix (word)) {
350                g_free (word);
351                return;
352        }
353
354        /*
355         * Make sure we didn't just grab a cute nickname.
356         */
357        if (word [0] == '\"') {
358                g_free (word);
359                return;
360        }
361       
362        idxs->middle_idx = middle_idx;
363        name->middle = word;
364}
365
366static void
367e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs)
368{
369        int   idx;
370        int   start_idx;
371        char *str;
372
373        if (idxs->first_idx == -1)
374                return;
375
376        if (idxs->middle_idx > idxs->first_idx)
377                idx = idxs->middle_idx + strlen (name->middle);
378        else
379                idx = idxs->first_idx + strlen (name->first);
380
381        while (name->full [idx] != '\"' && name->full [idx] != '\0')
382                idx ++;
383
384        if (name->full [idx] != '\"')
385                return;
386
387        start_idx = idx;
388
389        /*
390         * Advance to the next double quote.
391         */
392        idx ++;
393       
394        while (name->full [idx] != '\"' && name->full [idx] != '\0')
395                idx ++;
396
397        if (name->full [idx] == '\0')
398                return;
399
400        str = g_malloc0 (idx - start_idx + 2);
401        strncpy (str, name->full + start_idx, idx - start_idx + 1);
402
403        name->nick = str;
404        idxs->nick_idx = start_idx;
405}
406
407static int
408e_name_western_last_get_max_idx (ENameWestern *name, ENameWesternIdxs *idxs)
409{
410        int max_idx = -1;
411
412        if (name->prefix != NULL)
413                max_idx = e_name_western_max (
414                        max_idx, idxs->prefix_idx + strlen (name->prefix));
415
416        if (name->first != NULL)
417                max_idx = e_name_western_max (
418                        max_idx, idxs->first_idx + strlen (name->first));
419
420        if (name->middle != NULL)
421                max_idx = e_name_western_max (
422                        max_idx, idxs->middle_idx + strlen (name->middle));
423
424        if (name->nick != NULL)
425                max_idx = e_name_western_max (
426                        max_idx, idxs->nick_idx + strlen (name->nick));
427
428        return max_idx;
429}
430
431static void
432e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
433{
434        char *word;
435        int   idx = -1;
436
437        idx = e_name_western_last_get_max_idx (name, idxs);
438
439        /*
440         * In the case where there is no preceding name element, the
441         * name is either just a first name ("Nat", "John"), is a
442         * single-element name ("Cher", which we treat as a first
443         * name), or is just a last name.  The only time we can
444         * differentiate a last name alone from a single-element name
445         * or a first name alone is if it's a complex last name ("de
446         * Icaza", "van Josephsen").  So if there is no preceding name
447         * element, we check to see whether or not the first part of
448         * the name is the beginning of a complex name.  If it is,
449         * we subsume the entire string.  If we accidentally subsume
450         * the suffix, this will get fixed in the fixup routine.
451         */
452        if (idx == -1) {
453                word = e_name_western_get_words_at_idx (name->full, 0, 1);
454                if (! e_name_western_is_complex_last_beginning (word)) {
455                        g_free (word);
456                        return;
457                }
458
459                name->last     = g_strdup (name->full);
460                idxs->last_idx = 0;
461                return;
462        }
463
464        /* Skip past the white space. */
465        while (isspace (name->full [idx]) && name->full [idx] != '\0')
466                idx ++;
467
468        if (name->full [idx] == '\0')
469                return;
470
471        word = e_name_western_get_words_at_idx (name->full, idx, 1);
472        e_name_western_cleanup_string (& word);
473        if (e_name_western_word_is_suffix (word)) {
474                g_free (word);
475                return;
476        }
477        g_free (word);
478
479        /*
480         * Subsume the rest of the string into the last name.  If we
481         * accidentally include the prefix, it will get fixed later.
482         * This is the only way to handle things like "Miguel de Icaza
483         * Amozorrutia" without dropping data and forcing the user
484         * to retype it.
485         */
486        name->last = g_strdup (name->full + idx);
487        idxs->last_idx = idx;
488}
489
490static char *
491e_name_western_get_preceding_word (char *str, int idx)
492{
493        int   word_len;
494        char *word;
495        char *p;
496
497        p = str + idx;
498
499        while (isspace (*p) && p > str)
500                p --;
501
502        while (! isspace (*p) && p > str)
503                p --;
504
505        if (isspace (*p))
506            p ++;
507
508        word_len = (str + idx) - p;
509        word = g_malloc0 (word_len + 1);
510        if (word_len > 0)
511                strncpy (word, p, word_len);
512
513        return word;
514}
515
516static char *
517e_name_western_get_suffix_at_str_end (char *str)
518{
519        char *suffix;
520        char *p;
521
522        /*
523         * Walk backwards till we reach the beginning of the
524         * (potentially-comma-separated) list of suffixes.
525         */
526        p = str + strlen (str);
527        while (1) {
528                char *nextp;
529                char *word;
530
531                word = e_name_western_get_preceding_word (str, p - str);
532                nextp = p - strlen (word) - 1;
533               
534                e_name_western_cleanup_string (& word);
535
536                if (e_name_western_word_is_suffix (word)) {
537                        p = nextp;
538                        g_free (word);
539                } else {
540                        g_free (word);
541                        break;
542                }
543        }
544
545        if (p == (str + strlen (str)))
546                return NULL;
547
548        suffix = g_strdup (p);
549        e_name_western_cleanup_string (& suffix);
550
551        if (strlen (suffix) == 0) {
552                g_free (suffix);
553                return NULL;
554        }
555
556        return suffix;
557}
558
559static void
560e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs)
561{
562
563        name->suffix = e_name_western_get_suffix_at_str_end (name->full);
564
565        if (name->suffix == NULL)
566                return;
567
568        idxs->suffix_idx = strlen (name->full) - strlen (name->suffix);
569}
570
571static gboolean
572e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs)
573{
574        char *comma;
575        char *word;
576
577        comma = strchr (name->full, ',');
578
579        if (comma == NULL)
580                return FALSE;
581
582        /*
583         * If there's a comma, we need to detect whether it's
584         * separating the last name from the first or just separating
585         * suffixes.  So we grab the word which comes before the
586         * comma and check if it's a suffix.
587         */
588        word = e_name_western_get_preceding_word (name->full, comma - name->full);
589
590        if (e_name_western_word_is_suffix (word)) {
591                g_free (word);
592                return FALSE;
593        }
594
595        g_free (word);
596        return TRUE;
597}
598
599static void
600e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
601{
602        char *prefix;
603        char *last;
604        char *suffix;
605        char *firstmidnick;
606        char *newfull;
607
608        char *comma;
609        char *p;
610
611        if (! e_name_western_detect_backwards (name, idxs))
612                return;
613
614        /*
615         * Convert
616         *    <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix>
617         * to
618         *    <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix>
619         */
620       
621        /*
622         * Grab the prefix from the beginning.
623         */
624        prefix = e_name_western_get_prefix_at_str (name->full);
625
626        /*
627         * Everything from the end of the prefix to the comma is the
628         * last name.
629         */
630        comma = strchr (name->full, ',');
631        if (comma == NULL)
632                return;
633
634        p = name->full + (prefix == NULL ? 0 : strlen (prefix));
635
636        while (isspace (*p) && *p != '\0')
637                p ++;
638
639        last = g_malloc0 (comma - p + 1);
640        strncpy (last, p, comma - p);
641
642        /*
643         * Get the suffix off the end.
644         */
645        suffix = e_name_western_get_suffix_at_str_end (name->full);
646
647        /*
648         * Firstmidnick is everything from the comma to the beginning
649         * of the suffix.
650         */
651        p = comma + 1;
652
653        while (isspace (*p) && *p != '\0')
654                p ++;
655
656        if (suffix != NULL) {
657                char *q;
658
659                /*
660                 * Point q at the beginning of the suffix.
661                 */
662                q = name->full + strlen (name->full) - strlen (suffix) - 1;
663
664                /*
665                 * Walk backwards until we hit the space which
666                 * separates the suffix from firstmidnick.
667                 */
668                while (! isspace (*q) && q > comma)
669                        q --;
670
671                if ((q - p + 1) > 0) {
672                        firstmidnick = g_malloc0 (q - p + 1);
673                        strncpy (firstmidnick, p, q - p);
674                } else
675                        firstmidnick = NULL;
676        } else {
677                firstmidnick = g_strdup (p);
678        }
679
680        /*
681         * Create our new reordered version of the name.
682         */
683#define NULLSTR(a) ((a) == NULL ? "" : (a))
684        newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick),
685                                   NULLSTR (last), NULLSTR (suffix));
686        g_strstrip (newfull);
687        g_free (name->full);
688        name->full = newfull;
689
690
691        g_free (prefix);
692        g_free (firstmidnick);
693        g_free (last);
694        g_free (suffix);
695}
696
697static void
698e_name_western_zap_nil (char **str, int *idx)
699{
700        if (*str == NULL)
701                return;
702
703        if (strlen (*str) != 0)
704                return;
705
706        *idx = -1;
707        g_free (*str);
708        *str = NULL;
709}
710
711#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
712        char *last_start = NULL;        \
713        if (name->last) \
714                last_start = strchr (name->last, ' ');  \
715        if (last_start) {       \
716                char *new_last, *new_first;     \
717        \
718                new_last = g_strdup (last_start + 1);   \
719                *last_start = '\0';     \
720        \
721                idxs->last_idx += (last_start - name->last) + 1;        \
722        \
723                new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last);        \
724        \
725                g_free (name->first);   \
726                g_free (name->middle);  \
727                g_free (name->last);    \
728        \
729                name->first = new_first;        \
730                name->middle = NULL;    \
731                name->last = new_last;  \
732        \
733                idxs->middle_idx = -1;  \
734        } else {        \
735                char *new_first;        \
736        \
737                new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last);        \
738        \
739                g_free (name->first);   \
740                g_free (name->middle);  \
741                g_free (name->last);    \
742        \
743                name->first = new_first;        \
744                name->middle = NULL;    \
745                name->last = NULL;      \
746                idxs->middle_idx = -1;  \
747                idxs->last_idx = -1;    \
748        }
749
750#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
751        if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) {   \
752                FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
753        }
754
755#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \
756        if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) {       \
757                FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
758        }
759
760static void
761e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs)
762{
763        /*
764         * The middle and last names cannot be the same.
765         */
766        if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) {
767                idxs->middle_idx = -1;
768                g_free (name->middle);
769                name->middle = NULL;
770        }
771
772        /*
773         * If we have a middle name and no last name, then we mistook
774         * the last name for the middle name.
775         */
776        if (idxs->last_idx == -1 && idxs->middle_idx != -1) {
777                idxs->last_idx   = idxs->middle_idx;
778                name->last       = name->middle;
779                name->middle     = NULL;
780                idxs->middle_idx = -1;
781        }
782
783        /*
784         * Check to see if we accidentally included the suffix in the
785         * last name.
786         */
787        if (idxs->suffix_idx != -1 && idxs->last_idx != -1 &&
788            idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) {
789                char *sfx;
790
791                sfx = name->last + (idxs->suffix_idx - idxs->last_idx);
792                if (sfx != NULL) {
793                        char *newlast;
794                        char *p;
795
796                        p = sfx - 1;
797                        while (isspace (*p) && p > name->last)
798                                p --;
799                        p ++;
800
801                        newlast = g_malloc0 (p - name->last + 1);
802                        strncpy (newlast, name->last, p - name->last);
803                        g_free (name->last);
804                        name->last = newlast;
805                }
806        }
807
808        /*
809         * If we have a prefix and a first name, but no last name,
810         * then we need to assign the first name to the last name.
811         * This way we get things like "Mr Friedman" correctly.
812         */
813        if (idxs->first_idx != -1 && idxs->prefix_idx != -1 &&
814            idxs->last_idx == -1) {
815                name->last      = name->first;
816                idxs->last_idx  = idxs->first_idx;
817                idxs->first_idx = -1;
818                name->first     = NULL;
819        }
820
821        if (idxs->middle_idx != -1) {
822                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&");
823                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*");
824                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("|");
825                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^");
826                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&");
827                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("||");
828                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+");
829                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-");
830                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and");
831                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or");
832                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus");
833
834                /* Spanish */
835                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y");
836
837                /* German */
838                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und");
839
840                /* Italian */
841                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e");
842
843                /* Czech */
844                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a");
845
846                /* Finnish */
847                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja");
848
849                /* French */
850                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et");
851
852                /* Russian */
853                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */
854                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */
855        }
856
857        /*
858         * Remove stray spaces and commas (although there don't seem
859         * to be any in the test cases, they might show up later).
860         */
861        e_name_western_cleanup_string (& name->prefix);
862        e_name_western_cleanup_string (& name->first);
863        e_name_western_cleanup_string (& name->middle);
864        e_name_western_cleanup_string (& name->nick);
865        e_name_western_cleanup_string (& name->last);
866        e_name_western_cleanup_string (& name->suffix);
867
868        /*
869         * Make zero-length strings just NULL.
870         */
871        e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx);
872        e_name_western_zap_nil (& name->first,  & idxs->first_idx);
873        e_name_western_zap_nil (& name->middle, & idxs->middle_idx);
874        e_name_western_zap_nil (& name->nick,   & idxs->nick_idx);
875        e_name_western_zap_nil (& name->last,   & idxs->last_idx);
876        e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx);
877}
878
879/**
880 * e_name_western_western_parse_fullname:
881 * @full_name: A string containing a Western name.
882 *
883 * Parses @full_name and returns an #ENameWestern object filled with
884 * the component parts of the name.
885 */
886ENameWestern *
887e_name_western_parse (const char *full_name)
888{
889        ENameWesternIdxs *idxs;
890        ENameWestern *wname;
891
892        wname = g_new0 (ENameWestern, 1);
893
894        wname->full = g_strdup (full_name);
895
896        idxs = g_new0 (ENameWesternIdxs, 1);
897
898        idxs->prefix_idx = -1;
899        idxs->first_idx  = -1;
900        idxs->middle_idx = -1;
901        idxs->nick_idx   = -1;
902        idxs->last_idx   = -1;
903        idxs->suffix_idx = -1;
904       
905        /*
906         * An extremely simple algorithm.
907         *
908         * The goal here is to get it right 95% of the time for
909         * Western names.
910         *
911         * First we check to see if this is an ass-backwards name
912         * ("Prefix Last, First Middle Suffix").  These names really
913         * suck (imagine "Dr von Johnson, Albert Roderick Jr"), so
914         * we reorder them first and then parse them.
915         *
916         * Next, we grab the most obvious assignments for the various
917         * parts of the name.  Once this is done, we check for stupid
918         * errors and fix them up.
919         */
920        e_name_western_reorder_asshole  (wname, idxs);
921
922        e_name_western_extract_prefix   (wname, idxs);
923        e_name_western_extract_first    (wname, idxs);
924        e_name_western_extract_nickname (wname, idxs);
925        e_name_western_extract_middle   (wname, idxs);
926        e_name_western_extract_last     (wname, idxs);
927        e_name_western_extract_suffix   (wname, idxs);
928
929        e_name_western_fixup            (wname, idxs);
930
931        g_free (idxs);
932
933        return wname;
934}
935
936/**
937 * e_name_western_free:
938 * @name: An ENameWestern object which needs to be freed.
939 *
940 * Deep-frees @name
941 */
942void
943e_name_western_free (ENameWestern *w)
944{
945
946        g_free (w->prefix);
947        g_free (w->first);
948        g_free (w->middle);
949        g_free (w->nick);
950        g_free (w->last);
951        g_free (w->suffix);
952       
953        g_free (w->full);
954
955        g_free (w);
956}
Note: See TracBrowser for help on using the repository browser.