source: trunk/third/evolution/e-util/ename/e-name-western.c @ 18142

Revision 18142, 20.7 KB checked in by ghudson, 22 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r18141, which included commits to RCS files with non-trunk default branches.
Line 
1/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2/*
3 * A simple Western name parser.
4 *
5 * <Nat> Jamie, do you know anything about name parsing?
6 * <jwz> Are you going down that rat hole?  Bring a flashlight.
7 *
8 * Authors:
9 *   Nat Friedman <nat@ximian.com>
10 *
11 * Copyright 1999 - 2001, Ximian, Inc.
12 */
13
14#include <ctype.h>
15#include <string.h>
16#include <glib.h>
17 
18#include <ename/e-name-western.h>
19#include <ename/e-name-western-tables.h>
20
21typedef struct {
22        int prefix_idx;
23        int first_idx;
24        int middle_idx;
25        int nick_idx;
26        int last_idx;
27        int suffix_idx;
28} ENameWesternIdxs;
29
30static int
31e_name_western_str_count_words (char *str)
32{
33        int word_count;
34        char *p;
35
36        word_count = 0;
37
38        for (p = str; p != NULL; p = strchr (p, ' ')) {
39                word_count ++;
40                p ++;
41        }
42
43        return word_count;
44}
45
46static void
47e_name_western_cleanup_string (char **str)
48{
49        char *newstr;
50        char *p;
51
52        if (*str == NULL)
53                return;
54
55        /* skip any spaces and commas at the start of the string */
56        p = *str;
57        while (isspace ((unsigned char)*p) || *p == ',')
58                p ++;
59
60        /* make the copy we're going to return */
61        newstr = g_strdup (p);
62
63        if ( strlen(newstr) > 0) {
64                /* now search from the back, skipping over any spaces and commas */
65                p = newstr + strlen (newstr) - 1;
66                while (isspace ((unsigned char)*p) || *p == ',')
67                        p --;
68                /* advance p to after the character that caused us to exit the
69                   previous loop, and end the string. */
70                if ((! isspace ((unsigned char)*p)) && *p != ',')
71                        p ++;
72                *p = '\0';
73        }
74
75        g_free (*str);
76        *str = newstr;
77}
78
79static char *
80e_name_western_get_words_at_idx (char *str, int idx, int num_words)
81{
82        char *words;
83        char *p;
84        int   word_count;
85        int   words_len;
86
87        /*
88         * Walk to the end of the words.
89         */
90        word_count = 0;
91        p = str + idx;
92        while (word_count < num_words && *p != '\0') {
93                while (! isspace ((unsigned char)*p) && *p != '\0')
94                        p ++;
95
96                while (isspace ((unsigned char)*p) && *p != '\0')
97                        p ++;
98
99                word_count ++;
100        }
101
102        words_len = p - str - idx - 1;
103
104        if (*p == '\0')
105                words_len ++;
106
107        words = g_malloc0 (1 + words_len);
108        strncpy (words, str + idx, words_len);
109
110        return words;
111}
112
113/*
114 * What the fuck is wrong with glib's MAX macro.
115 */
116static int
117e_name_western_max (const int a, const int b)
118{
119        if (a > b)
120                return a;
121
122        return b;
123}
124
125static gboolean
126e_name_western_word_is_suffix (char *word)
127{
128        int i;
129
130        for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) {
131                int length = strlen (e_name_western_sfx_table [i]);
132                if (!g_strcasecmp (word, e_name_western_sfx_table [i]) ||
133                    ( !g_strncasecmp (word, e_name_western_sfx_table [i], length) &&
134                      strlen(word) == length + 1 &&
135                      word[length] == '.' ))
136                        return TRUE;
137        }
138
139        return FALSE;
140}
141
142static char *
143e_name_western_get_one_prefix_at_str (char *str)
144{
145        char *word;
146        int   i;
147
148        /*
149         * Check for prefixes from our table.
150         */
151        for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) {
152                int pfx_words;
153                char *words;
154
155                pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]);
156                words = e_name_western_get_words_at_idx (str, 0, pfx_words);
157
158                if (! g_strcasecmp (words, e_name_western_pfx_table [i]))
159                        return words;
160
161                g_free (words);
162        }
163
164        /*
165         * Check for prefixes we don't know about.  These are always a
166         * sequence of more than one letters followed by a period.
167         */
168        word = e_name_western_get_words_at_idx (str, 0, 1);
169
170        if (strlen (word) > 2 &&
171            isalpha ((unsigned char) word [0]) &&
172            isalpha ((unsigned char) word [1]) &&
173            word [strlen (word) - 1] == '.')
174                return word;
175
176        g_free (word);
177
178        return NULL;
179}
180
181static char *
182e_name_western_get_prefix_at_str (char *str)
183{
184        char *pfx;
185        char *pfx1;
186        char *pfx2;
187        char *p;
188
189        /* Get the first prefix. */
190        pfx1 = e_name_western_get_one_prefix_at_str (str);
191
192        if (pfx1 == NULL)
193                return NULL;
194
195        /* Check for a second prefix. */
196        p = str + strlen (pfx1);
197        while (isspace ((unsigned char)*p) && *p != '\0')
198                p ++;
199
200        pfx2 = e_name_western_get_one_prefix_at_str (p);
201
202        if (pfx2 != NULL) {
203                int pfx_len;
204
205                pfx_len = (p + strlen (pfx2)) - str;
206                pfx = g_malloc0 (pfx_len + 1);
207                strncpy (pfx, str, pfx_len);
208        } else {
209                pfx = g_strdup (pfx1);
210        }
211
212        g_free (pfx1);
213        g_free (pfx2);
214
215        return pfx;
216}
217
218static void
219e_name_western_extract_prefix (ENameWestern *name, ENameWesternIdxs *idxs)
220{
221        char *pfx;
222
223        pfx = e_name_western_get_prefix_at_str (name->full);
224
225        if (pfx == NULL)
226                return;
227
228        idxs->prefix_idx = 0;
229        name->prefix     = pfx;
230}
231
232static gboolean
233e_name_western_is_complex_last_beginning (char *word)
234{
235        int i;
236
237        for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) {
238
239                if (! g_strcasecmp (
240                        word, e_name_western_complex_last_table [i]))
241                        return TRUE;
242        }
243
244        return FALSE;
245}
246
247static void
248e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs)
249{
250        /*
251         * If there's a prefix, then the first name is right after it.
252         */
253        if (idxs->prefix_idx != -1) {
254                int   first_idx;
255                char *p;
256
257                first_idx = idxs->prefix_idx + strlen (name->prefix);
258
259                /* Skip past white space. */
260                p = name->full + first_idx;
261                while (isspace ((unsigned char)*p) && *p != '\0')
262                        p++;
263
264                if (*p == '\0')
265                        return;
266
267                idxs->first_idx = p - name->full;
268                name->first = e_name_western_get_words_at_idx (
269                        name->full, idxs->first_idx, 1);
270
271        } else {
272
273                /*
274                 * Otherwise, the first name is probably the first string.
275                 */
276                idxs->first_idx = 0;
277                name->first = e_name_western_get_words_at_idx (
278                        name->full, idxs->first_idx, 1);
279        }
280
281        /*
282         * Check that we didn't just assign the beginning of a
283         * compound last name to the first name.
284         */
285        if (name->first != NULL) {
286                if (e_name_western_is_complex_last_beginning (name->first)) {
287                        g_free (name->first);
288                        name->first = NULL;
289                        idxs->first_idx = -1;
290                }
291        }
292}
293
294static void
295e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
296{
297        char *word;
298        int   middle_idx;
299
300        /*
301         * Middle names can only exist if you have a first name.
302         */
303        if (idxs->first_idx == -1)
304                return;
305
306        middle_idx = idxs->first_idx + strlen (name->first) + 1;
307
308        if (middle_idx > strlen (name->full))
309                return;
310       
311        /*
312         * Search for the first space (or the terminating \0)
313         */
314        while (isspace ((unsigned char)name->full [middle_idx]) &&
315               name->full [middle_idx] != '\0')
316                middle_idx ++;
317               
318        if (name->full [middle_idx] == '\0')
319                return;
320
321        /*
322         * Skip past the nickname, if it's there.
323         */
324        if (name->full [middle_idx] == '\"') {
325                if (idxs->nick_idx == -1)
326                        return;
327
328                middle_idx = idxs->nick_idx + strlen (name->nick) + 1;
329               
330                while (isspace ((unsigned char)name->full [middle_idx]) &&
331                       name->full [middle_idx] != '\0')
332                        middle_idx ++;
333
334                if (name->full [middle_idx] == '\0')
335                        return;
336        }
337
338        /*
339         * Make sure this isn't the beginning of a complex last name.
340         */
341        word = e_name_western_get_words_at_idx (name->full, middle_idx, 1);
342        if (e_name_western_is_complex_last_beginning (word)) {
343                g_free (word);
344                return;
345        }
346
347        /*
348         * Make sure this isn't a suffix.
349         */
350        e_name_western_cleanup_string (& word);
351        if (e_name_western_word_is_suffix (word)) {
352                g_free (word);
353                return;
354        }
355
356        /*
357         * Make sure we didn't just grab a cute nickname.
358         */
359        if (word [0] == '\"') {
360                g_free (word);
361                return;
362        }
363       
364        idxs->middle_idx = middle_idx;
365        name->middle = word;
366}
367
368static void
369e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs)
370{
371        int   idx;
372        int   start_idx;
373        char *str;
374
375        if (idxs->first_idx == -1)
376                return;
377
378        if (idxs->middle_idx > idxs->first_idx)
379                idx = idxs->middle_idx + strlen (name->middle);
380        else
381                idx = idxs->first_idx + strlen (name->first);
382
383        while (name->full [idx] != '\"' && name->full [idx] != '\0')
384                idx ++;
385
386        if (name->full [idx] != '\"')
387                return;
388
389        start_idx = idx;
390
391        /*
392         * Advance to the next double quote.
393         */
394        idx ++;
395       
396        while (name->full [idx] != '\"' && name->full [idx] != '\0')
397                idx ++;
398
399        if (name->full [idx] == '\0')
400                return;
401
402        str = g_malloc0 (idx - start_idx + 2);
403        strncpy (str, name->full + start_idx, idx - start_idx + 1);
404
405        name->nick = str;
406        idxs->nick_idx = start_idx;
407}
408
409static int
410e_name_western_last_get_max_idx (ENameWestern *name, ENameWesternIdxs *idxs)
411{
412        int max_idx = -1;
413
414        if (name->prefix != NULL)
415                max_idx = e_name_western_max (
416                        max_idx, idxs->prefix_idx + strlen (name->prefix));
417
418        if (name->first != NULL)
419                max_idx = e_name_western_max (
420                        max_idx, idxs->first_idx + strlen (name->first));
421
422        if (name->middle != NULL)
423                max_idx = e_name_western_max (
424                        max_idx, idxs->middle_idx + strlen (name->middle));
425
426        if (name->nick != NULL)
427                max_idx = e_name_western_max (
428                        max_idx, idxs->nick_idx + strlen (name->nick));
429
430        return max_idx;
431}
432
433static void
434e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
435{
436        char *word;
437        int   idx = -1;
438
439        idx = e_name_western_last_get_max_idx (name, idxs);
440
441        /*
442         * In the case where there is no preceding name element, the
443         * name is either just a first name ("Nat", "John"), is a
444         * single-element name ("Cher", which we treat as a first
445         * name), or is just a last name.  The only time we can
446         * differentiate a last name alone from a single-element name
447         * or a first name alone is if it's a complex last name ("de
448         * Icaza", "van Josephsen").  So if there is no preceding name
449         * element, we check to see whether or not the first part of
450         * the name is the beginning of a complex name.  If it is,
451         * we subsume the entire string.  If we accidentally subsume
452         * the suffix, this will get fixed in the fixup routine.
453         */
454        if (idx == -1) {
455                word = e_name_western_get_words_at_idx (name->full, 0, 1);
456                if (! e_name_western_is_complex_last_beginning (word)) {
457                        g_free (word);
458                        return;
459                }
460
461                name->last     = g_strdup (name->full);
462                idxs->last_idx = 0;
463                return;
464        }
465
466        /* Skip past the white space. */
467        while (isspace ((unsigned char)name->full [idx]) && name->full [idx] != '\0')
468                idx ++;
469
470        if (name->full [idx] == '\0')
471                return;
472
473        word = e_name_western_get_words_at_idx (name->full, idx, 1);
474        e_name_western_cleanup_string (& word);
475        if (e_name_western_word_is_suffix (word)) {
476                g_free (word);
477                return;
478        }
479        g_free (word);
480
481        /*
482         * Subsume the rest of the string into the last name.  If we
483         * accidentally include the prefix, it will get fixed later.
484         * This is the only way to handle things like "Miguel de Icaza
485         * Amozorrutia" without dropping data and forcing the user
486         * to retype it.
487         */
488        name->last = g_strdup (name->full + idx);
489        idxs->last_idx = idx;
490}
491
492static char *
493e_name_western_get_preceding_word (char *str, int idx)
494{
495        int   word_len;
496        char *word;
497        char *p;
498
499        p = str + idx;
500
501        while (isspace ((unsigned char)*p) && p > str)
502                p --;
503
504        while (! isspace ((unsigned char)*p) && p > str)
505                p --;
506
507        if (isspace ((unsigned char)*p))
508            p ++;
509
510        word_len = (str + idx) - p;
511        word = g_malloc0 (word_len + 1);
512        if (word_len > 0)
513                strncpy (word, p, word_len);
514
515        return word;
516}
517
518static char *
519e_name_western_get_suffix_at_str_end (char *str)
520{
521        char *suffix;
522        char *p;
523
524        /*
525         * Walk backwards till we reach the beginning of the
526         * (potentially-comma-separated) list of suffixes.
527         */
528        p = str + strlen (str);
529        while (1) {
530                char *nextp;
531                char *word;
532
533                word = e_name_western_get_preceding_word (str, p - str);
534                nextp = p - strlen (word) - 1;
535               
536                e_name_western_cleanup_string (& word);
537
538                if (e_name_western_word_is_suffix (word)) {
539                        p = nextp;
540                        g_free (word);
541                } else {
542                        g_free (word);
543                        break;
544                }
545        }
546
547        if (p == (str + strlen (str)))
548                return NULL;
549
550        suffix = g_strdup (p);
551        e_name_western_cleanup_string (& suffix);
552
553        if (strlen (suffix) == 0) {
554                g_free (suffix);
555                return NULL;
556        }
557
558        return suffix;
559}
560
561static void
562e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs)
563{
564
565        name->suffix = e_name_western_get_suffix_at_str_end (name->full);
566
567        if (name->suffix == NULL)
568                return;
569
570        idxs->suffix_idx = strlen (name->full) - strlen (name->suffix);
571}
572
573static gboolean
574e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs)
575{
576        char *comma;
577        char *word;
578
579        comma = strchr (name->full, ',');
580
581        if (comma == NULL)
582                return FALSE;
583
584        /*
585         * If there's a comma, we need to detect whether it's
586         * separating the last name from the first or just separating
587         * suffixes.  So we grab the word which comes before the
588         * comma and check if it's a suffix.
589         */
590        word = e_name_western_get_preceding_word (name->full, comma - name->full);
591
592        if (e_name_western_word_is_suffix (word)) {
593                g_free (word);
594                return FALSE;
595        }
596
597        g_free (word);
598        return TRUE;
599}
600
601static void
602e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
603{
604        char *prefix;
605        char *last;
606        char *suffix;
607        char *firstmidnick;
608        char *newfull;
609
610        char *comma;
611        char *p;
612
613        if (! e_name_western_detect_backwards (name, idxs))
614                return;
615
616        /*
617         * Convert
618         *    <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix>
619         * to
620         *    <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix>
621         */
622       
623        /*
624         * Grab the prefix from the beginning.
625         */
626        prefix = e_name_western_get_prefix_at_str (name->full);
627
628        /*
629         * Everything from the end of the prefix to the comma is the
630         * last name.
631         */
632        comma = strchr (name->full, ',');
633        if (comma == NULL)
634                return;
635
636        p = name->full + (prefix == NULL ? 0 : strlen (prefix));
637
638        while (isspace ((unsigned char)*p) && *p != '\0')
639                p ++;
640
641        last = g_malloc0 (comma - p + 1);
642        strncpy (last, p, comma - p);
643
644        /*
645         * Get the suffix off the end.
646         */
647        suffix = e_name_western_get_suffix_at_str_end (name->full);
648
649        /*
650         * Firstmidnick is everything from the comma to the beginning
651         * of the suffix.
652         */
653        p = comma + 1;
654
655        while (isspace ((unsigned char)*p) && *p != '\0')
656                p ++;
657
658        if (suffix != NULL) {
659                char *q;
660
661                /*
662                 * Point q at the beginning of the suffix.
663                 */
664                q = name->full + strlen (name->full) - strlen (suffix) - 1;
665
666                /*
667                 * Walk backwards until we hit the space which
668                 * separates the suffix from firstmidnick.
669                 */
670                while (! isspace ((unsigned char)*q) && q > comma)
671                        q --;
672
673                if ((q - p + 1) > 0) {
674                        firstmidnick = g_malloc0 (q - p + 1);
675                        strncpy (firstmidnick, p, q - p);
676                } else
677                        firstmidnick = NULL;
678        } else {
679                firstmidnick = g_strdup (p);
680        }
681
682        /*
683         * Create our new reordered version of the name.
684         */
685#define NULLSTR(a) ((a) == NULL ? "" : (a))
686        newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick),
687                                   NULLSTR (last), NULLSTR (suffix));
688        g_strstrip (newfull);
689        g_free (name->full);
690        name->full = newfull;
691
692
693        g_free (prefix);
694        g_free (firstmidnick);
695        g_free (last);
696        g_free (suffix);
697}
698
699static void
700e_name_western_zap_nil (char **str, int *idx)
701{
702        if (*str == NULL)
703                return;
704
705        if (strlen (*str) != 0)
706                return;
707
708        *idx = -1;
709        g_free (*str);
710        *str = NULL;
711}
712
713#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
714        char *last_start = NULL;        \
715        if (name->last) \
716                last_start = strchr (name->last, ' ');  \
717        if (last_start) {       \
718                char *new_last, *new_first;     \
719        \
720                new_last = g_strdup (last_start + 1);   \
721                *last_start = '\0';     \
722        \
723                idxs->last_idx += (last_start - name->last) + 1;        \
724        \
725                new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last);        \
726        \
727                g_free (name->first);   \
728                g_free (name->middle);  \
729                g_free (name->last);    \
730        \
731                name->first = new_first;        \
732                name->middle = NULL;    \
733                name->last = new_last;  \
734        \
735                idxs->middle_idx = -1;  \
736        } else {        \
737                char *new_first;        \
738        \
739                new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last);        \
740        \
741                g_free (name->first);   \
742                g_free (name->middle);  \
743                g_free (name->last);    \
744        \
745                name->first = new_first;        \
746                name->middle = NULL;    \
747                name->last = NULL;      \
748                idxs->middle_idx = -1;  \
749                idxs->last_idx = -1;    \
750        }
751
752#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
753        if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) {   \
754                FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
755        }
756
757#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \
758        if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) {       \
759                FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION        \
760        }
761
762static void
763e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs)
764{
765        /*
766         * The middle and last names cannot be the same.
767         */
768        if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) {
769                idxs->middle_idx = -1;
770                g_free (name->middle);
771                name->middle = NULL;
772        }
773
774        /*
775         * If we have a middle name and no last name, then we mistook
776         * the last name for the middle name.
777         */
778        if (idxs->last_idx == -1 && idxs->middle_idx != -1) {
779                idxs->last_idx   = idxs->middle_idx;
780                name->last       = name->middle;
781                name->middle     = NULL;
782                idxs->middle_idx = -1;
783        }
784
785        /*
786         * Check to see if we accidentally included the suffix in the
787         * last name.
788         */
789        if (idxs->suffix_idx != -1 && idxs->last_idx != -1 &&
790            idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) {
791                char *sfx;
792
793                sfx = name->last + (idxs->suffix_idx - idxs->last_idx);
794                if (sfx != NULL) {
795                        char *newlast;
796                        char *p;
797
798                        p = sfx - 1;
799                        while (isspace ((unsigned char)*p) && p > name->last)
800                                p --;
801                        p ++;
802
803                        newlast = g_malloc0 (p - name->last + 1);
804                        strncpy (newlast, name->last, p - name->last);
805                        g_free (name->last);
806                        name->last = newlast;
807                }
808        }
809
810        /*
811         * If we have a prefix and a first name, but no last name,
812         * then we need to assign the first name to the last name.
813         * This way we get things like "Mr Friedman" correctly.
814         */
815        if (idxs->first_idx != -1 && idxs->prefix_idx != -1 &&
816            idxs->last_idx == -1) {
817                name->last      = name->first;
818                idxs->last_idx  = idxs->first_idx;
819                idxs->first_idx = -1;
820                name->first     = NULL;
821        }
822
823        if (idxs->middle_idx != -1) {
824                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&");
825                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*");
826                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("|");
827                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^");
828                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&");
829                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("||");
830                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+");
831                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-");
832                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and");
833                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or");
834                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus");
835
836                /* Spanish */
837                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y");
838
839                /* German */
840                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und");
841
842                /* Italian */
843                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e");
844
845                /* Czech */
846                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a");
847
848                /* Finnish */
849                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja");
850
851                /* French */
852                CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et");
853
854                /* Russian */
855                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */
856                CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */
857        }
858
859        /*
860         * Remove stray spaces and commas (although there don't seem
861         * to be any in the test cases, they might show up later).
862         */
863        e_name_western_cleanup_string (& name->prefix);
864        e_name_western_cleanup_string (& name->first);
865        e_name_western_cleanup_string (& name->middle);
866        e_name_western_cleanup_string (& name->nick);
867        e_name_western_cleanup_string (& name->last);
868        e_name_western_cleanup_string (& name->suffix);
869
870        /*
871         * Make zero-length strings just NULL.
872         */
873        e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx);
874        e_name_western_zap_nil (& name->first,  & idxs->first_idx);
875        e_name_western_zap_nil (& name->middle, & idxs->middle_idx);
876        e_name_western_zap_nil (& name->nick,   & idxs->nick_idx);
877        e_name_western_zap_nil (& name->last,   & idxs->last_idx);
878        e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx);
879}
880
881/**
882 * e_name_western_western_parse_fullname:
883 * @full_name: A string containing a Western name.
884 *
885 * Parses @full_name and returns an #ENameWestern object filled with
886 * the component parts of the name.
887 */
888ENameWestern *
889e_name_western_parse (const char *full_name)
890{
891        ENameWesternIdxs *idxs;
892        ENameWestern *wname;
893
894        wname = g_new0 (ENameWestern, 1);
895
896        wname->full = g_strdup (full_name);
897
898        idxs = g_new0 (ENameWesternIdxs, 1);
899
900        idxs->prefix_idx = -1;
901        idxs->first_idx  = -1;
902        idxs->middle_idx = -1;
903        idxs->nick_idx   = -1;
904        idxs->last_idx   = -1;
905        idxs->suffix_idx = -1;
906       
907        /*
908         * An extremely simple algorithm.
909         *
910         * The goal here is to get it right 95% of the time for
911         * Western names.
912         *
913         * First we check to see if this is an ass-backwards name
914         * ("Prefix Last, First Middle Suffix").  These names really
915         * suck (imagine "Dr von Johnson, Albert Roderick Jr"), so
916         * we reorder them first and then parse them.
917         *
918         * Next, we grab the most obvious assignments for the various
919         * parts of the name.  Once this is done, we check for stupid
920         * errors and fix them up.
921         */
922        e_name_western_reorder_asshole  (wname, idxs);
923
924        e_name_western_extract_prefix   (wname, idxs);
925        e_name_western_extract_first    (wname, idxs);
926        e_name_western_extract_nickname (wname, idxs);
927        e_name_western_extract_middle   (wname, idxs);
928        e_name_western_extract_last     (wname, idxs);
929        e_name_western_extract_suffix   (wname, idxs);
930
931        e_name_western_fixup            (wname, idxs);
932
933        g_free (idxs);
934
935        return wname;
936}
937
938/**
939 * e_name_western_free:
940 * @name: An ENameWestern object which needs to be freed.
941 *
942 * Deep-frees @name
943 */
944void
945e_name_western_free (ENameWestern *w)
946{
947
948        g_free (w->prefix);
949        g_free (w->first);
950        g_free (w->middle);
951        g_free (w->nick);
952        g_free (w->last);
953        g_free (w->suffix);
954       
955        g_free (w->full);
956
957        g_free (w);
958}
Note: See TracBrowser for help on using the repository browser.