Apply big patch from rn E. Hansen.

This commit is contained in:
Jim Meyering
1997-10-13 02:08:01 +00:00
parent 168acf8b9c
commit 79bfec7ced
+622 -54
View File
@@ -19,6 +19,16 @@
The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
or (US mail) as Mike Haertel c/o Free Software Foundation. */
/* NLS addition added 1997 by Ørn E. Hansen.
Who can be reached at (e-mail) oehansen@daimi.aau.dk,
oe.hansen@halmstad.mail.telia.com
The additions made to allow NLS for sorting, is free software
and can be freely distributed or modified, under the GNU general
public licence as published by the Free Software Foundation. */
#include <config.h>
/* Get isblank from GNU libc. */
@@ -36,6 +46,13 @@
#include "error.h"
#include "xstrtod.h"
#ifdef ENABLE_NLS
/* this may need some heading.... applies to Debian linux */
/* for reading the structur of _NL_ITEM... to get abreviated month */
/* names */
#include <langinfo.h>
#endif /* NLS */
#ifdef HAVE_LIMITS_H
# include <limits.h>
#else
@@ -52,6 +69,8 @@ void free ();
/* Undefine, to avoid warning about redefinition on some systems. */
#undef min
#define min(a, b) ((a) < (b) ? (a) : (b))
#undef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#define UCHAR_LIM (UCHAR_MAX + 1)
#define UCHAR(c) ((unsigned char) (c))
@@ -67,6 +86,52 @@ void free ();
status code greater than 1. */
#define SORT_FAILURE 2
/* Some character constants used in the program. Better do assign */
/* these globally. Makes the program a little more readable. */
static unsigned char decimal_point = '.';
static unsigned char th_sep = ',';
static unsigned char *nls_grouping = "\003\003";
#define FLOATING_POINT '.'
#define FLOATING_COMMA ','
#define NEGATIVE_SIGN '-'
#define NUMERIC_ZERO '0'
#define CHARS_IN_ABM 3
static int need_locale = 0; /* This is "C" locale, need another? */
static int nls_fraction_found = 1; /* Should we look for decimal point? */
static int nls_month_found = 1; /* Look for month notations in text? */
/* If native language support is requested, make a 1-1 map to the */
/* locale character map, otherwise ensure normal behaviour */
#ifdef ENABLE_NLS
#define NLS_KEY_LIMIT 30 /* Keys have limited length */
#define NLS_NUM_MONTHS 12 /* 12 months in a year */
#define NLS_MAX_GROUPS 8 /* Maximum number of groups */
/* A string with one character, to enforce char collation */
#define NLS_ONE_CHARACTER_STRING " "
/* Two buffers, specificly used to get a one-one map of the table */
/* used under inittables. */
unsigned char *nls_temp_buf1, *nls_temp_buf2;
/* Create a map, that maps the characters in the "C" locale */
/* 1 - 1 to the locale view of character order */
unsigned char nls_locale_map[UCHAR_LIM];
/* A definition to map each character through the above translation */
/* table, during sort. */
#define NLS_MAP(c) UCHAR(c)
#else
/* No NLS the character value itself, represents the sorting order */
#define NLS_MAP(c) UCHAR(c)
#endif
/* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both };
@@ -143,7 +208,7 @@ static char fold_toupper[UCHAR_LIM];
/* Table mapping 3-letter month names to integers.
Alphabetic order allows binary search. */
static struct month const monthtab[] =
static struct month us_monthtab[] =
{
{"APR", 4},
{"AUG", 8},
@@ -159,6 +224,23 @@ static struct month const monthtab[] =
{"SEP", 9}
};
#ifdef ENABLE_NLS
/* Locale may have a different idea of month names */
static struct month nls_monthtab[NLS_NUM_MONTHS];
static int nls_months_collide[NLS_NUM_MONTHS+1];
/* Numeric keys, to search for numeric format */
static struct nls_keyfield {
struct keyfield *key;
struct nls_keyfield *next;
} *nls_keyhead = NULL;
#endif
/* Which month table to use in the program, default C */
static struct month *monthtab = us_monthtab;
/* During the merge phase, the number of files to merge at once. */
#define NMERGE 16
@@ -246,7 +328,7 @@ for that key. If no key given, use the entire line as key. With no\n\
FILE, or when FILE is -, read standard input.\n\
")
, DEFAULT_TMPDIR);
puts (_("\nReport bugs to <textutils-bugs@gnu.org>."));
puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu"));
}
/* Don't use EXIT_FAILURE here in case it is defined to be 1.
POSIX requires that sort return 1 IFF invoked with -c and
@@ -445,8 +527,39 @@ zaptemp (char *name)
}
}
#ifdef ENABLE_NLS
/* Initialize the character class tables. */
static int nls_sort_month_comp(struct month *m1, struct month *m2)
{
return strcoll(m1->name, m2->name);
}
/* strncoll(a, b, l) */
/* do collation on strings a and b, but for at most l characters */
/* we use the fact, that we KNOW that l is the min of the two lengths */
/* and we make use of the fact, that collation on chars has already */
/* been done and is stored in NLS_MAP */
static int strncoll(unsigned char *s1, unsigned char *s2, int l)
{
register int diff = 0;
if (need_locale) {
/* Let's emulate a strncoll() function, by forcing strcoll() */
/* to compare only l characters in both strings. */
register unsigned char n1=s1[l],n2=s2[l];
s1[l]=s2[l]=0;
diff = strcoll(s1, s2);
s1[l]=n1;
s2[l]=n2;
} else
diff = memcmp(s1, s2, l);
return diff;
}
#endif /* NLS */
static void
inittables (void)
{
@@ -465,6 +578,33 @@ inittables (void)
else
fold_toupper[i] = i;
}
#ifdef ENABLE_NLS
/* If We're not in the "C" locale, we gotta read in different */
/* names for months. */
if (need_locale) {
unsigned char *s;
int j;
int (*comp)() = nls_sort_month_comp;
nls_months_collide[0] = 1; /* if an error, look again */
for (i = 0; i < NLS_NUM_MONTHS; i++) {
s = nl_langinfo(_NL_ITEM(LC_TIME, ABMON_1+us_monthtab[i].val-1));
nls_monthtab[i].name = strdup(s);
nls_monthtab[i].val = us_monthtab[i].val;
/* It has been pointed out, that abreviated month names */
/* may be longer than the usual 3 characters */
for(j=0;j<strlen(s);j++) nls_monthtab[i].name[j] = fold_toupper[s[j]];
nls_months_collide[nls_monthtab[i].val] = (strncmp(nls_monthtab[i].name, us_monthtab[i].name, CHARS_IN_ABM) == 0);
}
/* Now quicksort the month table (should be sorted already!) */
/* However, another locale doesn't rule out the possibility */
/* of a different order of month names. */
qsort((void *)nls_monthtab, NLS_NUM_MONTHS, sizeof(struct month), comp);
monthtab = nls_monthtab;
}
#endif /* NLS */
}
/* Initialize BUF, allocating ALLOC bytes initially. */
@@ -754,13 +894,86 @@ findlines (struct buffer *buf, struct lines *lines)
should begin with a decimal point followed immediately by the digits
of the fraction. Strings not of this form are considered to be zero. */
/* The goal here, is to take two numbers a and b... compare these
in parallel. Instead of converting each, and then comparing the
outcome. Most likely stopping the comparison before the conversion
is complete. The algorithm used, in the old sort:
Algorithm: fraccompare
Action : compare two decimal fractions
accepts : char *a, char *b
returns : -1 if a<b, 0 if a=b, 1 if a>b.
implement:
if *a == decimal_point AND *b == decimal_point
find first character different in a and b.
if both are digits, return the difference *a - *b.
if *a is a digit
skip past zeroes
if digit return 1, else 0
if *b is a digit
skip past zeroes
if digit return -1, else 0
if *a is a decimal_point
skip past decimal_point and zeroes
if digit return 1, else 0
if *b is a decimal_point
skip past decimal_point and zeroes
if digit return -1, else 0
return 0
As can be clearly seen, the above implementation duplicates code,
and thus there is place for improvement:
the difference in code of a and b, is solved by using a
refernce to s, assigned to either a or b. and using n
to denote return value.
the difference in either that start being a digit or
the decimal point, is solved by testing if either is
a decimal point, or if the other is a digit...
if *a or *b is a decimal_point
skip all chars where *a == *b
if *a and *b are digits return *a - *b
s is b, and return code is -1
if *a is a digit or *a is a decimal_pointm then s is a, return code 1
skip decimal_point in s
skip zeroes in s
if *s is a digit, return n
return 0 */
#ifdef ENABLE_NLS
static int fraccompare(register const char *a, register const char *b)
{
register const char *s;
int n = -1;
if (!nls_fraction_found) nls_fraction_found=1;
if (*a == decimal_point || *b == decimal_point) {
if (*a == *b)
do {
++a, ++b;
} while (*a == *b && ISDIGIT(*a));
if (ISDIGIT(*a) && ISDIGIT(*b))
return (*a) - (*b);
s = b;
if (*a==decimal_point || (ISDIGIT(*a) && *b!=decimal_point))
s = a, n=1;
if (*s == decimal_point) ++s;
while (*s == NUMERIC_ZERO) ++s;
if (ISDIGIT(*s)) return n;
}
return 0;
}
#else
static int
fraccompare (register const char *a, register const char *b)
{
register int tmpa = *a;
register int tmpb = *b;
if (tmpa == '.' && tmpb == '.')
if (tmpa == decimal_point && tmpb == decimal_point)
{
do
tmpa = *++a, tmpb = *++b;
@@ -769,15 +982,15 @@ fraccompare (register const char *a, register const char *b)
return tmpa - tmpb;
if (ISDIGIT (tmpa))
{
while (tmpa == '0')
while (tmpa == NUMERIC_ZERO)
tmpa = *++a;
if (ISDIGIT (tmpa))
return 1;
return 0;
}
if (ISDIGIT (tmpb))
if (digits[tmpb])
{
while (tmpb == '0')
while (tmpb == NUMERIC_ZERO)
tmpb = *++b;
if (ISDIGIT (tmpb))
return -1;
@@ -785,31 +998,224 @@ fraccompare (register const char *a, register const char *b)
}
return 0;
}
else if (tmpa == '.')
else if (tmpa == decimal_point)
{
do
tmpa = *++a;
while (tmpa == '0');
while (tmpa == NUMERIC_ZERO);
if (ISDIGIT (tmpa))
return 1;
return 0;
}
else if (tmpb == '.')
else if (tmpb == decimal_point)
{
do
tmpb = *++b;
while (tmpb == '0');
while (tmpb == NUMERIC_ZERO);
if (ISDIGIT (tmpb))
return -1;
return 0;
}
return 0;
}
#endif
/* Compare strings A and B as numbers without explicitly converting them to
machine numbers. Comparatively slow for short strings, but asymptotically
hideously fast. */
/* The code here, is like the above... continuous reoccurrance of the
same code... improved 15-JAN-1997 in connection with native languages
support */
#ifdef ENABLE_NLS
/* Decide the kind of fraction the program will use */
static int nls_set_fraction(register unsigned char ch)
{
if (!nls_fraction_found && ch != decimal_point)
if (ch == FLOATING_POINT) { /* US style */
decimal_point = FLOATING_POINT;
th_sep = FLOATING_COMMA;
} else if (ch == FLOATING_COMMA) { /* EU style */
decimal_point = FLOATING_COMMA;
th_sep = FLOATING_POINT;
} else if (ch != decimal_point) { /* Alien */
decimal_point = ch;
th_sep = '\0';
}
return nls_fraction_found=1;
}
/* Look for a fraction
It ain't as simple as it looks... however, consider a number:
1.234,00
1,234.00
It's easy to tell which is a decimal point, and which isn't. We use
the grouping iformation to find out how many digits are grouped together
for thousand seperator.
The idea here, is to use the grouping information... but not to
spend time with verifying the groups... not too much time, anyway.
so, a number represented to us as:
1.234.567,89
will be taken and seperated into different groups, seperated by a
seperator character (Decimal point or thousands seperator).
{1,234,567}
these are the groups of digits that lead to a seperator character,
and with the trailing group is added:
{1,234,567,89}
resulting in 4 groups of numbers. If the resulting number of groups,
are none, or just 1... this is not enough to decide anything about
the decimal point. We need at least two for that. With two groups
we have at least one seperator. That seperator can be a decimal
point, or a thousands seperator... if it is a thousands seperator
the number of digits in the last group, will comply with the first
rule in the grouping rule for numeric values. i.e.
|{89}| = grouping[0]
if so, and there are only two groups of numbers, the value cannot
be determined. If there are three or more numbers, the seperator
seperating the groups is checked. If these are the same, the
character is determined to be a thousands seperator. If they are
not the same, the last seperator is determined to be a decimal
point. If checking the grouping rules, we find out that there
are no grouping rules defined, either the grouping rules is NULL
or the first grouping number is 0, then the locale format is used.
We try to take an advantage of a special situation. If the trailing
group, the one that normally should be the fractional part, turns
out to have the same length as the thousands seperator rule says,
making a doubt on that it may be a decimal point, we look for the
group before that, i.e. with a two group form:
{1234,567}
where the grouping rule is 3;3... we take a look at group 1, and find
out that |{1234}| > larger of the two first grouping rules, then
the seperator has to be a decimal point...
*/
static int look_for_fraction(unsigned char *s, unsigned char *e)
{
/* I don't think it's reasonable to think of more than 6 groups */
register unsigned char *p=s, n=0;
unsigned short groups[NLS_MAX_GROUPS];
/* skip blanks and signs */
while(blanks[*s] || *s == NEGATIVE_SIGN) s++;
/* groups = {}, n = 0 */
for(;p < e;p++) {
/* groups[n]={number of digits leading to seperator n}
n = number of seperators so far */
if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT) {
if (++n >= NLS_MAX_GROUPS) return; /* WOW! BIG Number... */
groups[n] = (short)(p - s), s=p+1;
} else if (!ISDIGIT(*p)) break;
/* mem[s..p]=digits only */
}
/* n = number of seperators in s..e */
groups[++n]=(short)(p - s);
/* n = groups in the number */
if (n <= 1) return 0; /* Only one group of numbers... not enough */
p = nls_grouping;
/* p = address of group rules
s = address of next character after seperator */
s = s - 1; /* s = address of last seperator */
if (p && *p) {
/* a legal trailing group, iff groups[n] == first rule */
if (groups[n] != (short)*p) return nls_set_fraction(*s);
if (n == 2) { /* Only two groups */
if (groups[n-1] > max(p[0],p[1]))
return nls_set_fraction(*s);
return 0;
}
/* if the seperators are the same, it's a thousands */
if (*s != *(s - groups[n]))
return nls_set_fraction(*s);
/* s[0] = thousands seperator */
if (*s == FLOATING_COMMA)
return nls_set_fraction(FLOATING_POINT);
return nls_fraction_found=1;
} else { /* no grouping allowed here, last seperator IS decimal point */
return nls_set_fraction(*s);
}
return 0;
}
static int
numcompare (register const unsigned char *a, register const unsigned char *b)
{
int ret_code = 1; /* normal return status, see later in code */
int diff = 0; /* difference between two digits */
while (blanks[*a]) ++a;
while (blanks[*b]) ++b;
/* next character in a,b is non-blank */
if ((*a == NEGATIVE_SIGN || *b == NEGATIVE_SIGN) && *a != *b) {
/* a < 0, or b < 0, but not both */
if (*a == NEGATIVE_SIGN) ret_code = -1, ++a; /* a looks < b */
else if (*b == NEGATIVE_SIGN) ret_code = 1, ++b; /* b looks < a */
/* bypass zeroes, decimal points, and thousand sep in a & b */
while (*a == NUMERIC_ZERO ||(th_sep && *a == th_sep)|| *a == decimal_point)
++a;
while (*b == NUMERIC_ZERO ||(th_sep && *b == th_sep)|| *b == decimal_point)
++b;
if (ISDIGIT(*a) || ISDIGIT(*b))
/* here, either a or b or both are digits
if a and b are digits, the signed one is the lesser.
if a is a digit, and not b.. it means b==0, and if b==0
than either is signed if b is signed then -0 < a
or if a is signed then -a < 0. The ret_code is already set
to mark that the signed number is the lesser, so we just
return that number here. */
return ret_code;
/* *a and *b are neither digits, they are equal -0 == +0 */
return 0;
} else {
/* either both numbers are signed, or both are not-signed */
if (*a == NEGATIVE_SIGN) ++a, ++b, ret_code=-1;
/* if both are signed, then remember -100 < -10 (ret_code reversed!) */
/* Skip any leading zeroes */
while (*a == NUMERIC_ZERO) ++a;
while (*b == NUMERIC_ZERO) ++b;
continue_thousands:
/* skip all equal digits */
while (ISDIGIT(*a) && ISDIGIT(*b) && *a == *b)
a++, b++;
/* Here, we have either different digits, or possible fractions
or thousand seperators. */
if (ISDIGIT(*a) && ISDIGIT(*b)) {
if (diff == 0)
diff = ((*a) - (*b)); /* simple, isn't it? not quite */
a++, b++;
goto continue_thousands;
}
/* now, here either may be a fraction, or a thousand seperator...
or both. */
/* We've decided what are decimal_points, and what are thousands sep */
if ((th_sep != 0) && (*a == th_sep || *b == th_sep)) {
if (*a == th_sep) ++a;
if (*b == th_sep) ++b;
goto continue_thousands; /* Ugly, but better than a while(1) */
}
if (ISDIGIT(*a)) return ret_code * 1; /* a has more digits than b */
if (ISDIGIT(*b)) return ret_code * -1; /* b has more digits than a */
/* now, we should have the fractions solved */
if ((diff == 0) && (*a == decimal_point || *b == decimal_point))
return ret_code * fraccompare(a, b);
return diff; /* fall through here, and diff decides */
}
}
#else
static int
numcompare (register const char *a, register const char *b)
{
@@ -823,48 +1229,47 @@ numcompare (register const char *a, register const char *b)
while (blanks[tmpb])
tmpb = UCHAR (*++b);
if (tmpa == '-')
if (tmpa == NEGATIVE_SIGN)
{
do
tmpa = *++a;
while (tmpa == '0');
if (tmpb != '-')
tmpa = UCHAR (*++a);
while (tmpa == NUMERIC_ZERO);
if (tmpb != NEGATIVE_SIGN)
{
if (tmpa == '.')
if (tmpa == decimal_point)
do
tmpa = *++a;
while (tmpa == '0');
while (tmpa == NUMERIC_ZERO);
if (ISDIGIT (tmpa))
return -1;
while (tmpb == '0')
tmpb = *++b;
if (tmpb == '.')
while (tmpb == NUMERIC_ZERO)
tmpb = UCHAR (*++b);
if (tmpb == decimal_point)
do
tmpb = *++b;
while (tmpb == '0');
while (tmpb == NUMERIC_ZERO);
if (ISDIGIT (tmpb))
return -1;
return 0;
}
do
tmpb = *++b;
while (tmpb == '0');
tmpb = UCHAR (*++b);
while (tmpb == NUMERIC_ZERO);
while (tmpa == tmpb && ISDIGIT (tmpa))
tmpa = *++a, tmpb = *++b;
while (tmpa == tmpb && digits[tmpa])
tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
if ((tmpa == '.' && !ISDIGIT (tmpb))
|| (tmpb == '.' && !ISDIGIT (tmpa)))
if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa)))
return -fraccompare (a, b);
if (ISDIGIT (tmpa))
for (loga = 1; ISDIGIT (*++a); ++loga)
for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
;
else
loga = 0;
if (ISDIGIT (tmpb))
for (logb = 1; ISDIGIT (*++b); ++logb)
for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
;
else
logb = 0;
@@ -877,49 +1282,48 @@ numcompare (register const char *a, register const char *b)
return tmpb - tmpa;
}
else if (tmpb == '-')
else if (tmpb == NEGATIVE_SIGN)
{
do
tmpb = *++b;
while (tmpb == '0');
if (tmpb == '.')
tmpb = UCHAR (*++b);
while (tmpb == NUMERIC_ZERO);
if (tmpb == decimal_point)
do
tmpb = *++b;
while (tmpb == '0');
while (tmpb == NUMERIC_ZERO);
if (ISDIGIT (tmpb))
return 1;
while (tmpa == '0')
tmpa = *++a;
if (tmpa == '.')
while (tmpa == NUMERIC_ZERO)
tmpa = UCHAR (*++a);
if (tmpa == decimal_point)
do
tmpa = *++a;
while (tmpa == '0');
tmpa = UCHAR (*++a);
while (tmpa == NUMERIC_ZERO);
if (ISDIGIT (tmpa))
return 1;
return 0;
}
else
{
while (tmpa == '0')
tmpa = *++a;
while (tmpb == '0')
tmpb = *++b;
while (tmpa == NUMERIC_ZERO)
tmpa = UCHAR (*++a);
while (tmpb == NUMERIC_ZERO)
tmpb = UCHAR (*++b);
while (tmpa == tmpb && ISDIGIT (tmpa))
tmpa = *++a, tmpb = *++b;
tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
if ((tmpa == '.' && !ISDIGIT (tmpb))
|| (tmpb == '.' && !ISDIGIT (tmpa)))
if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa)))
return fraccompare (a, b);
if (ISDIGIT (tmpa))
for (loga = 1; ISDIGIT (*++a); ++loga)
for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga)
;
else
loga = 0;
if (ISDIGIT (tmpb))
for (logb = 1; ISDIGIT (*++b); ++logb)
for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb)
;
else
logb = 0;
@@ -933,6 +1337,7 @@ numcompare (register const char *a, register const char *b)
return tmpa - tmpb;
}
}
#endif
static int
general_numcompare (const char *sa, const char *sb)
@@ -967,20 +1372,42 @@ getmonth (const char *s, int len)
if (len < 3)
return 0;
for (i = 0; i < 3; ++i)
for (i = 0; i < CHARS_IN_ABM; ++i)
month[i] = fold_toupper[UCHAR (s[i])];
month[3] = '\0';
while (hi - lo > 1)
while (hi - lo > 1) {
#ifdef ENABLE_NLS
if (strcoll (month, monthtab[(lo+hi)/2].name) < 0)
#else
if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
#endif
hi = (lo + hi) / 2;
else
lo = (lo + hi) / 2;
}
if (!strcmp (month, monthtab[lo].name))
return monthtab[lo].val;
return 0;
}
#ifdef ENABLE_NLS
/* Look for the month in locale table, and if that fails try with
us month name table */
static int nls_month_is_either_locale(const char *s, int len)
{
int ind;
monthtab = nls_monthtab;
ind = getmonth(s, len);
if (ind == 0) {
monthtab = us_monthtab;
ind = getmonth(s, len);
}
return ind;
}
#endif
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1082,11 +1509,60 @@ keycompare (const struct line *a, const struct line *b)
}
else if (key->month)
{
#ifdef ENABLE_NLS
/* if we haven't decided which locale to go with, we get the
month name from either. If either month name is fully
solved and the month name doesn't collide with the other
locale... then use that table from there forward */
if (!nls_month_found) {
int x;
x = nls_month_is_either_locale(texta, lena);
if (nls_month_found = !nls_months_collide[x])
diff = x - getmonth(textb, lenb);
else {
diff = nls_month_is_either_locale(textb, lenb);
nls_month_found = !nls_months_collide[diff];
diff = x - diff;
}
} else
#endif
diff = getmonth (texta, lena) - getmonth (textb, lenb);
if (diff)
return key->reverse ? -diff : diff;
continue;
}
#ifdef ENABLE_NLS
/* This sorting may become slow, so in a simple locale */
/* The user can select a faster sort, that is similar */
/* to ascii sort, but 8-bit instead of 7-bit. But */
/* can't handle more complex, combined, character sets */
else if (need_locale) {
unsigned char copy_a[lena+1], copy_b[lenb+1];
int la, lb, i;
/* we can't just go strcoll() the two strings, but */
/* must extract the text for the key, and do the */
/* proper 'ignore' and 'translate' before comparing */
for(la=lb=i=0;i<max(lena,lenb);i++) {
if (i < lena) {
copy_a[la]=translate?translate[UCHAR(texta[i])]:texta[i];
la = ignore?(ignore[UCHAR(texta[i])]?la:la+1):la+1;
}
if (i < lenb) {
copy_b[lb]=translate?translate[UCHAR(textb[i])]:textb[i];
lb = ignore?(ignore[UCHAR(textb[i])]?lb:lb+1):lb+1;
}
}
copy_a[la]=copy_b[lb]=0;
diff = strcoll(copy_a, copy_b);
if (diff)
return key->reverse? -diff:diff;
continue;
}
#endif
else if (ignore && translate)
#define CMP_WITH_IGNORE(A, B) \
@@ -1102,7 +1578,7 @@ keycompare (const struct line *a, const struct line *b)
{ \
if ((A) != (B)) \
{ \
diff = (A) - (B); \
diff = NLS_MAP(A) - NLS_MAP(B); \
break; \
} \
++texta; \
@@ -1144,13 +1620,21 @@ keycompare (const struct line *a, const struct line *b)
{
if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
{
diff = (translate[UCHAR (*--texta)]
- translate[UCHAR (*--textb)]);
diff = (NLS_MAP(translate[UCHAR (*--texta)])
- NLS_MAP(translate[UCHAR (*--textb)]));
break;
}
}
else
#ifndef ENABLE_NLS
diff = memcmp (texta, textb, min (lena, lenb));
#else
/* since we don't have a strncoll, should one be emulated? */
/* as the normal behaviour of the sort program, when two */
/* equivalent keys are met, is to sort according to length */
diff = strncoll (texta, textb, min(lena, lenb));
#endif
if (diff)
return key->reverse ? -diff : diff;
@@ -1191,10 +1675,18 @@ compare (register const struct line *a, register const struct line *b)
{
char *ap = a->text, *bp = b->text;
diff = UCHAR (*ap) - UCHAR (*bp);
#ifdef ENABLE_NLS
if (need_locale) /* want absolutely correct sorting */
return reverse ? -strcoll(ap, bp) : strcoll(ap, bp);
#endif
diff = NLS_MAP (*ap) - NLS_MAP (*bp);
if (diff == 0)
{
#ifdef ENABLE_NLS
diff = strncoll (ap, bp, mini);
#else
diff = memcmp (ap, bp, mini);
#endif
if (diff == 0)
diff = tmpa - tmpb;
}
@@ -1469,6 +1961,41 @@ mergefps (FILE **fps, register int nfps, FILE *ofp)
}
}
#ifdef ENABLE_NLS
/*
* Let's go into a frenzy and find the numeric format that this file
* represents to us for sorting.
*/
nls_numeric_format(const struct line *line, int nlines)
{
struct keyfield *key;
struct nls_keyfield *n_key = nls_keyhead;
int iter = 0;
unsigned char *text, *lim;
for(;!nls_fraction_found && nlines>0;line++,nlines--)
for(iter=0;!nls_fraction_found;++iter) {
key = n_key->key;
if (iter || line->keybeg == NULL) {
if (key->eword >= 0)
lim = limfield(line, key);
else
lim = line->text + line->length;
if (key->sword >= 0)
text = begfield(line, key);
else
text = line->text;
} else
text = line->keybeg, lim = line->keylim;
look_for_fraction(text, lim);
if ((n_key = n_key->next) == nls_keyhead) break;
}
return nls_fraction_found=1;
}
#endif
/* Sort the array LINES with NLINES members, using TEMP for temporary space. */
static void
@@ -1603,6 +2130,12 @@ sort (char **files, int nfiles, FILE *ofp)
tmp = (struct line *)
xrealloc ((char *) tmp, ntmp * sizeof (struct line));
}
#ifdef ENABLE_NLS
if (nls_keyhead)
nls_keyhead = nls_keyhead->next;
if (!nls_fraction_found && nls_keyhead)
nls_numeric_format(lines.lines, lines.used);
#endif
sortlines (lines.lines, lines.used, tmp);
if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
tfp = ofp;
@@ -1650,6 +2183,18 @@ insertkey (struct keyfield *key)
k = k->next;
k->next = key;
key->next = NULL;
if (key->numeric || key->general_numeric) {
struct nls_keyfield *nk;
nk = (struct nls_keyfield *)xmalloc(sizeof(struct nls_keyfield));
nk->key = key;
if (nls_keyhead) {
nk->next = nls_keyhead->next;
nls_keyhead->next = nk;
} else
nk->next = nk;
nls_keyhead = nk;
}
}
static void
@@ -1746,7 +2291,30 @@ main (int argc, char **argv)
#endif /* SA_INTERRUPT */
program_name = argv[0];
setlocale (LC_ALL, "");
#ifdef ENABLE_NLS
s = setlocale(LC_ALL, "");
if (strcmp(s, "C") && strcmp(s, "POSIX"))
need_locale = 1; /* Neither C nor POSIX, we need to initialize it */
/* Let's get locale's representation of the decimal point */
decimal_point = *( localeconv() )->decimal_point;
th_sep = *( localeconv() )->thousands_sep;
nls_grouping = ( localeconv() )->grouping;
/* if locale doesn't define a decimal point, we'll use the
US notation. */
if (decimal_point == 0)
decimal_point = FLOATING_POINT;
else
nls_fraction_found = 0; /* Figure out which decimal point to use */
nls_month_found = 0; /* Figure out which month notation to use */
monthtab = nls_monthtab;
#endif /* NLS */
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);