test: strings.c Source File

00001 /****************************** Module Header ******************************\ 00002 * Module Name: strings.c 00003 * 00004 * Copyright (c) 1985 - 1999, Microsoft Corporation 00005 * 00006 * This module contains all the string handling APIs and functions. Since 00007 * they don't access server-specific data they belong here in the client DLL. 00008 * 00009 * History: 00010 * 10-18-90 DarrinM Created. 00011 \***************************************************************************/ 00012 00013 #include "precomp.h" 00014 #pragma hdrstop 00015 00016 /* LATER these should be in a public header file!!! 00017 * Assorted defines used to support the standard Windows ANSI code page 00018 * (now known as code page 1252 and officially registered by IBM). 00019 * This is intended only for the PDK release. Subsequent releases will 00020 * use the NLSAPI and Unicode. 00021 */ 00022 #define LATIN_CAPITAL_LETTER_A_GRAVE (CHAR)0xc0 00023 #define LATIN_CAPITAL_LETTER_THORN (CHAR)0xde 00024 #define LATIN_SMALL_LETTER_SHARP_S (CHAR)0xdf 00025 #define LATIN_SMALL_LETTER_Y_DIAERESIS (CHAR)0xff 00026 #define DIVISION_SIGN (CHAR)0xf7 00027 #define MULTIPLICATION_SIGN (CHAR)0xd7 00028 00029 00030 /***************************************************************************\ 00031 * CharLowerA (API) 00032 * 00033 * Convert either a single character or an entire string to lower case. The 00034 * two cases are differentiated by checking the high-word of psz. If it is 00035 * 0 then we just convert the low-word of psz. 00036 * 00037 * History: 00038 * 11-26-90 DarrinM Created non-NLS version. 00039 * 06-22-91 GregoryW Modified to support code page 1252. This is for 00040 * the PDK release only. After the PDK this routine 00041 * will be modified to use the NLSAPI. Also renamed 00042 * API to conform to new naming conventions. AnsiLower 00043 * is now a #define which resolves to this routine. 00044 \***************************************************************************/ 00045 00046 LPSTR WINAPI CharLowerA( 00047 LPSTR psz) 00048 { 00049 NTSTATUS st; 00050 00051 /* 00052 * Early out for NULL string or '\0' 00053 */ 00054 if (psz == NULL) { 00055 return psz; 00056 } 00057 00058 if (!IS_PTR(psz)) { 00059 WCHAR wch; 00060 00061 #ifdef FE_SB // CharLowerA() 00062 /* 00063 * if only DBCS Leadbyte was passed, just return the character. 00064 * Same behavior as Windows 3.1J and Windows 95 FarEast version. 00065 */ 00066 if (IS_DBCS_ENABLED() && IsDBCSLeadByte((BYTE)(ULONG_PTR)psz)) { 00067 return psz; 00068 } 00069 #endif // FE_SB 00070 00071 // 00072 // LATER 14 Feb 92 GregoryW 00073 // For DBCS code pages is a double byte character ever 00074 // passed in the low word of psz or is the high nibble 00075 // of the low word always ignored? 00076 // 00077 st = RtlMultiByteToUnicodeN(&wch, sizeof(WCHAR), NULL, (PCH)&psz, sizeof(CHAR)); 00078 if (!NT_SUCCESS(st)) { 00079 /* 00080 * Failed! Caller is not expecting failure, CharLowerA does not 00081 * have a failure indicator, so just return the original character. 00082 */ 00083 RIPMSG1(RIP_WARNING, "CharLowerA(%#p) failed\n", psz); 00084 } else { 00085 /* 00086 * The next two calls never fail. 00087 */ 00088 LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_LOWERCASE, &wch, 1, &wch, 1); 00089 RtlUnicodeToMultiByteN((PCH)&psz, sizeof(CHAR), NULL, &wch, sizeof(WCHAR)); 00090 } 00091 return psz; 00092 00093 } 00094 00095 /* 00096 * psz is a null-terminated string 00097 */ 00098 CharLowerBuffA(psz, strlen(psz)+1); 00099 return psz; 00100 } 00101 00102 00103 /***************************************************************************\ 00104 * CharUpperA (API) 00105 * 00106 * Convert either a single character or an entire string to upper case. The 00107 * two cases are differentiated by checking the high-word of psz. If it is 00108 * 0 then we just convert the low-word of psz. 00109 * 00110 * History: 00111 * 12-03-90 IanJa derived from DarrinM's non-NLS AnsiLower 00112 * 06-22-91 GregoryW Modified to support code page 1252. This is for 00113 * the PDK release only. After the PDK this routine 00114 * will be modified to use the NLSAPI. Also renamed 00115 * API to conform to new naming conventions. AnsiUpper 00116 * is now a #define which resolves to this routine. 00117 \***************************************************************************/ 00118 00119 LPSTR WINAPI CharUpperA( 00120 LPSTR psz) 00121 { 00122 NTSTATUS st; 00123 00124 /* 00125 * Early out for NULL string or '\0' 00126 */ 00127 if (psz == NULL) { 00128 return psz; 00129 } 00130 00131 if (!IS_PTR(psz)) { 00132 WCHAR wch; 00133 00134 #ifdef FE_SB // CharLowerA() 00135 /* 00136 * if only DBCS Leadbyte was passed, just return the character. 00137 * Same behavior as Windows 3.1J and Windows 95 FarEast version. 00138 */ 00139 if (IS_DBCS_ENABLED() && IsDBCSLeadByte((BYTE)(ULONG_PTR)psz)) { 00140 return psz; 00141 } 00142 #endif // FE_SB 00143 00144 // 00145 // LATER 14 Feb 92 GregoryW 00146 // For DBCS code pages is a double byte character ever 00147 // passed in the low word of psz or is the high nibble 00148 // of the low word always ignored? 00149 // 00150 st = RtlMultiByteToUnicodeN(&wch, sizeof(WCHAR), NULL, (PCH)&psz, sizeof(CHAR)); 00151 if (!NT_SUCCESS(st)) { 00152 /* 00153 * Failed! Caller is not expecting failure, CharUpperA does not 00154 * have a failure indicator, so return the original character. 00155 */ 00156 RIPMSG1(RIP_WARNING, "CharUpperA(%#p) failed\n", psz); 00157 } else { 00158 /* 00159 * The next two calls never fail. 00160 */ 00161 LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_UPPERCASE, &wch, 1, &wch, 1); 00162 RtlUnicodeToMultiByteN((PCH)&psz, sizeof(CHAR), NULL, &wch, sizeof(WCHAR)); 00163 } 00164 return psz; 00165 00166 } 00167 00168 /* 00169 * psz is a null-terminated string 00170 */ 00171 CharUpperBuffA(psz, strlen(psz)+1); 00172 return psz; 00173 } 00174 00175 00176 /***************************************************************************\ 00177 * CharNextA (API) 00178 * 00179 * Move to next character in string unless already at '\0' terminator 00180 * DOES NOT WORK CORRECTLY FOR DBCS (eg: Japanese) 00181 * 00182 * History: 00183 * 12-03-90 IanJa Created non-NLS version. 00184 * 06-22-91 GregoryW Renamed API to conform to new naming conventions. 00185 * AnsiNext is now a #define which resolves to this 00186 * routine. This routine is only intended to support 00187 * code page 1252 for the PDK release. 00188 \***************************************************************************/ 00189 00190 LPSTR WINAPI CharNextA( 00191 LPCSTR lpCurrentChar) 00192 { 00193 #ifdef FE_SB // CharNextA(): dbcs enabling 00194 if (IS_DBCS_ENABLED() && IsDBCSLeadByte(*lpCurrentChar)) { 00195 lpCurrentChar++; 00196 } 00197 /* 00198 * if we have only DBCS LeadingByte, we will point string-terminaler. 00199 */ 00200 #endif // FE_SB 00201 00202 if (*lpCurrentChar) { 00203 lpCurrentChar++; 00204 } 00205 return (LPSTR)lpCurrentChar; 00206 } 00207 00208 00209 /***************************************************************************\ 00210 * CharNextExA (API) 00211 * 00212 * Move to next character in string unless already at '\0' terminator. 00213 * 00214 * History: 00215 * 05-01-95 GregoryW Ported from Win95. 00216 \***************************************************************************/ 00217 00218 LPSTR WINAPI CharNextExA( 00219 WORD CodePage, 00220 LPCSTR lpCurrentChar, 00221 DWORD dwFlags) 00222 { 00223 if (lpCurrentChar == (LPSTR)NULL) 00224 { 00225 return (LPSTR)lpCurrentChar; 00226 } 00227 00228 if (IsDBCSLeadByteEx(CodePage, *lpCurrentChar)) 00229 { 00230 lpCurrentChar++; 00231 } 00232 00233 if (*lpCurrentChar) 00234 { 00235 lpCurrentChar++; 00236 } 00237 return (LPSTR)lpCurrentChar; 00238 00239 UNREFERENCED_PARAMETER(dwFlags); 00240 } 00241 00242 00243 /***************************************************************************\ 00244 * CharPrevA (API) 00245 * 00246 * Move to previous character in string, unless already at start 00247 * DOES NOT WORK CORRECTLY FOR DBCS (eg: Japanese) 00248 * 00249 * History: 00250 * 12-03-90 IanJa Created non-NLS version. 00251 * 06-22-91 GregoryW Renamed API to conform to new naming conventions. 00252 * AnsiPrev is now a #define which resolves to this 00253 * routine. This routine is only intended to support 00254 * code page 1252 for the PDK release. 00255 \***************************************************************************/ 00256 00257 LPSTR WINAPI CharPrevA( 00258 LPCSTR lpStart, 00259 LPCSTR lpCurrentChar) 00260 { 00261 #ifdef FE_SB // CharPrevA : dbcs enabling 00262 if (lpCurrentChar > lpStart) { 00263 if (IS_DBCS_ENABLED()) { 00264 LPCSTR lpChar; 00265 BOOL bDBC = FALSE; 00266 00267 for (lpChar = --lpCurrentChar - 1 ; lpChar >= lpStart ; lpChar--) { 00268 if (!IsDBCSLeadByte(*lpChar)) 00269 break; 00270 bDBC = !bDBC; 00271 } 00272 00273 if (bDBC) 00274 lpCurrentChar--; 00275 } 00276 else 00277 lpCurrentChar--; 00278 } 00279 return (LPSTR)lpCurrentChar; 00280 #else 00281 if (lpCurrentChar > lpStart) { 00282 lpCurrentChar--; 00283 } 00284 return (LPSTR)lpCurrentChar; 00285 #endif // FE_SB 00286 } 00287 00288 /***************************************************************************\ 00289 * CharPrevExA (API) 00290 * 00291 * Move to previous character in string, unless already at start. 00292 * 00293 * History: 00294 * 05-01-95 GregoryW Ported from Win95. 00295 \***************************************************************************/ 00296 00297 LPSTR WINAPI CharPrevExA( 00298 WORD CodePage, 00299 LPCSTR lpStart, 00300 LPCSTR lpCurrentChar, 00301 DWORD dwFlags) 00302 { 00303 if (lpCurrentChar > lpStart) { 00304 LPCSTR lpChar; 00305 BOOL bDBC = FALSE; 00306 00307 for (lpChar = --lpCurrentChar - 1 ; lpChar >= lpStart ; lpChar--) { 00308 if (!IsDBCSLeadByteEx(CodePage, *lpChar)) 00309 break; 00310 bDBC = !bDBC; 00311 } 00312 00313 if (bDBC) 00314 lpCurrentChar--; 00315 } 00316 return (LPSTR)lpCurrentChar; 00317 00318 UNREFERENCED_PARAMETER(dwFlags); 00319 } 00320 00321 00322 /***************************************************************************\ 00323 * CharLowerBuffA (API) 00324 * 00325 * History: 00326 * 14-Jan-1991 mikeke from win 3.0 00327 * 06-22-91 GregoryW Renamed API to conform to new naming conventions. 00328 * AnsiLowerBuff is now a #define which resolves to this 00329 * routine. This routine is only intended to support 00330 * code page 1252 for the PDK release. 00331 * 02-20-1992 GregoryW Modified to use NLS API. 00332 \***************************************************************************/ 00333 #define CCH_LOCAL_BUFF 256 00334 00335 DWORD WINAPI CharLowerBuffA( 00336 LPSTR psz, 00337 DWORD nLength) 00338 { 00339 ULONG cb; 00340 WCHAR awchLocal[CCH_LOCAL_BUFF]; 00341 LPWSTR pwszT = awchLocal; 00342 int cwch; 00343 00344 if (nLength == 0) { 00345 return(0); 00346 } 00347 00348 /* 00349 * Convert ANSI to Unicode. 00350 * Use awchLocal if it is big enough, otherwise allocate space. 00351 */ 00352 cwch = MBToWCS( 00353 psz, // ANSI buffer 00354 nLength, // length of buffer 00355 &pwszT, // address of Unicode string 00356 (nLength > CCH_LOCAL_BUFF ? -1 : nLength), 00357 (nLength > CCH_LOCAL_BUFF) ); 00358 00359 if (cwch != 0) { 00360 CharLowerBuffW(pwszT, cwch); 00361 00362 /* 00363 * This can't fail 00364 */ 00365 RtlUnicodeToMultiByteN( 00366 psz, // ANSI string 00367 nLength, // given to us 00368 &cb, // result length 00369 pwszT, // Unicode string 00370 cwch * sizeof(WCHAR)); // length IN BYTES 00371 00372 if (pwszT != awchLocal) { 00373 UserLocalFree(pwszT); 00374 } 00375 00376 return (DWORD)cb; 00377 } 00378 00379 /* 00380 * MBToWCS failed! The caller is not expecting failure, 00381 * so we convert the string to lower case as best we can. 00382 */ 00383 RIPMSG2(RIP_WARNING, 00384 "CharLowerBuffA(%#p, %lx) failed\n", psz, nLength); 00385 00386 for (cb=0; cb < nLength; cb++) { 00387 #ifdef FE_SB // CharLowerBuffA(): skip double byte character 00388 if (IS_DBCS_ENABLED() && IsDBCSLeadByte(psz[cb])) { 00389 cb++; 00390 } else if (IsCharUpperA(psz[cb])) { 00391 psz[cb] += 'a'-'A'; 00392 } 00393 #else 00394 if (IsCharUpperA(psz[cb])) { 00395 psz[cb] += 'a'-'A'; 00396 } 00397 #endif // FE_SB 00398 } 00399 00400 return nLength; 00401 } 00402 00403 00404 /***************************************************************************\ 00405 * CharUpperBuffA (API) 00406 * 00407 * History: 00408 * 14-Jan-1991 mikeke from win 3.0 00409 * 06-22-91 GregoryW Renamed API to conform to new naming conventions. 00410 * AnsiUpperBuff is now a #define which resolves to this 00411 * routine. This routine is only intended to support 00412 * code page 1252 for the PDK release. 00413 * 02-Feb-1992 GregoryW Modified to use NLS API. 00414 \***************************************************************************/ 00415 00416 DWORD WINAPI CharUpperBuffA( 00417 LPSTR psz, 00418 DWORD nLength) 00419 { 00420 DWORD cb; 00421 WCHAR awchLocal[CCH_LOCAL_BUFF]; 00422 LPWSTR pwszT = awchLocal; 00423 int cwch; 00424 00425 if (nLength==0) { 00426 return(0); 00427 } 00428 00429 /* 00430 * Convert ANSI to Unicode. 00431 * Use awchLocal if it is big enough, otherwise allocate space. 00432 */ 00433 cwch = MBToWCS( 00434 psz, // ANSI buffer 00435 nLength, // length of buffer 00436 &pwszT, // address of Unicode string 00437 (nLength > CCH_LOCAL_BUFF ? -1 : nLength), 00438 (nLength > CCH_LOCAL_BUFF) ); 00439 00440 if (cwch != 0) { 00441 CharUpperBuffW(pwszT, cwch); 00442 00443 RtlUnicodeToMultiByteN( 00444 psz, // address of ANSI string 00445 nLength, // given to us 00446 &cb, // result length 00447 pwszT, // Unicode string 00448 cwch * sizeof(WCHAR)); // length IN BYTES 00449 00450 if (pwszT != awchLocal) { 00451 UserLocalFree(pwszT); 00452 } 00453 00454 return (DWORD)cb; 00455 } 00456 00457 /* 00458 * MBToWCS failed! The caller is not expecting failure, 00459 * so we convert the string to upper case as best we can. 00460 */ 00461 RIPMSG2(RIP_WARNING, 00462 "CharLowerBuffA(%#p, %lx) failed\n", psz, nLength); 00463 00464 for (cb=0; cb < nLength; cb++) { 00465 #ifdef FE_SB // CharUpperBuffA(): skip double byte characters 00466 if (IS_DBCS_ENABLED() && IsDBCSLeadByte(psz[cb])) { 00467 cb++; 00468 } else if (IsCharLowerA(psz[cb]) && 00469 /* 00470 * Sometime, LATIN_xxxx code is DBCS LeadingByte depending on ACP. 00471 * In that case, we never come here... 00472 */ 00473 (psz[cb] != LATIN_SMALL_LETTER_SHARP_S) && 00474 (psz[cb] != LATIN_SMALL_LETTER_Y_DIAERESIS)) { 00475 psz[cb] += 'A'-'a'; 00476 } 00477 #else 00478 if (IsCharLowerA(psz[cb]) && 00479 (psz[cb] != LATIN_SMALL_LETTER_SHARP_S) && 00480 (psz[cb] != LATIN_SMALL_LETTER_Y_DIAERESIS)) { 00481 psz[cb] += 'A'-'a'; 00482 } 00483 #endif // FE_SB 00484 } 00485 00486 return nLength; 00487 } 00488 00489 00490 /***************************************************************************\ 00491 * IsCharLowerA (API) 00492 * 00493 * History: 00494 * 14-Jan-1991 mikeke from win 3.0 00495 * 22-Jun-1991 GregoryW Modified to support code page 1252 (Windows ANSI 00496 * code page). This is for the PDK only. After the 00497 * PDK this routine will be rewritten to use the 00498 * NLSAPI. 00499 * 02-Feb-1992 GregoryW Modified to use NLS API. 00500 \***************************************************************************/ 00501 00502 BOOL WINAPI IsCharLowerA( 00503 char cChar) 00504 { 00505 WORD ctype1info = 0; 00506 WCHAR wChar = 0; 00507 00508 #ifdef FE_SB // IsCharLowerA() 00509 /* 00510 * if only DBCS Leadbyte was passed, just return FALSE. 00511 * Same behavior as Windows 3.1J and Windows 95 FarEast version. 00512 */ 00513 if (IS_DBCS_ENABLED() && IsDBCSLeadByte(cChar)) { 00514 return FALSE; 00515 } 00516 #endif // FE_SB 00517 00518 /* 00519 * The following 2 calls cannot fail here 00520 */ 00521 RtlMultiByteToUnicodeN(&wChar, sizeof(WCHAR), NULL, &cChar, sizeof(CHAR)); 00522 GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info); 00523 return (ctype1info & C1_LOWER) == C1_LOWER; 00524 } 00525 00526 00527 /***************************************************************************\ 00528 * IsCharUpperA (API) 00529 * 00530 * History: 00531 * 22-Jun-1991 GregoryW Created to support code page 1252 (Windows ANSI 00532 * code page). This is for the PDK only. After the 00533 * PDK this routine will be rewritten to use the 00534 * NLSAPI. 00535 * 02-Feb-1992 GregoryW Modified to use NLS API. 00536 \***************************************************************************/ 00537 00538 BOOL WINAPI IsCharUpperA( 00539 char cChar) 00540 { 00541 WORD ctype1info = 0; 00542 WCHAR wChar = 0; 00543 00544 #ifdef FE_SB // IsCharUpperA() 00545 /* 00546 * if only DBCS Leadbyte was passed, just return FALSE. 00547 * Same behavior as Windows 3.1J and Windows 95 FarEast version. 00548 */ 00549 if (IS_DBCS_ENABLED() && IsDBCSLeadByte(cChar)) { 00550 return FALSE; 00551 } 00552 #endif // FE_SB 00553 00554 /* 00555 * The following 2 calls cannot fail here 00556 */ 00557 RtlMultiByteToUnicodeN(&wChar, sizeof(WCHAR), NULL, &cChar, sizeof(CHAR)); 00558 GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info); 00559 return (ctype1info & C1_UPPER) == C1_UPPER; 00560 } 00561 00562 00563 /***************************************************************************\ 00564 * IsCharAlphaNumericA (API) 00565 * 00566 * Returns TRUE if character is alphabetical or numerical, otherwise FALSE 00567 * 00568 * History: 00569 * 12-03-90 IanJa Created non-NLS stub version. 00570 * 06-22-91 GregoryW Modified to support code page 1252 (Windows ANSI 00571 * code page). This is for the PDK only. After the 00572 * PDK this routine will be rewritten to use the 00573 * NLSAPI. 00574 * 02-20-92 GregoryW Modified to use the NLS API. 00575 \***************************************************************************/ 00576 00577 BOOL WINAPI IsCharAlphaNumericA( 00578 char cChar) 00579 { 00580 WORD ctype1info = 0; 00581 WCHAR wChar = 0; 00582 00583 /* 00584 * The following 2 calls cannot fail here 00585 */ 00586 RtlMultiByteToUnicodeN(&wChar, sizeof(WCHAR), NULL, &cChar, sizeof(CHAR)); 00587 GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info); 00588 #ifdef FE_SB // IsCharAlphaNumericA() 00589 if (ctype1info & C1_ALPHA) { 00590 WORD ctype3info = 0; 00591 if (!IS_DBCS_ENABLED()) { 00592 return TRUE; 00593 } 00594 /* 00595 * We don't want to return TRUE for halfwidth katakana. 00596 * Katakana is linguistic character (C1_ALPHA), but it is not 00597 * alphabet character. 00598 */ 00599 GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info); 00600 return ((ctype3info & (C3_KATAKANA|C3_HIRAGANA)) ? FALSE : TRUE); 00601 } 00602 /* Otherwise, it might be digits ? */ 00603 return !!(ctype1info & C1_DIGIT); 00604 #else 00605 return (ctype1info & C1_ALPHA) || (ctype1info & C1_DIGIT); 00606 #endif // FE_SB 00607 } 00608 00609 00610 /***************************************************************************\ 00611 * IsCharAlphaA (API) 00612 * 00613 * Returns TRUE if character is alphabetical, otherwise FALSE 00614 * 00615 * History: 00616 * 06-22-91 GregoryW Created to support code page 1252 (Windows ANSI 00617 * code page). This is for the PDK only. After the 00618 * PDK this routine will be rewritten to use the 00619 * NLSAPI. 00620 * 02-20-92 GregoryW Modified to use the NLS API. 00621 \***************************************************************************/ 00622 00623 BOOL WINAPI IsCharAlphaA( 00624 char cChar) 00625 { 00626 WORD ctype1info = 0; 00627 WCHAR wChar = 0; 00628 00629 /* 00630 * The following 2 calls cannot fail here 00631 */ 00632 RtlMultiByteToUnicodeN(&wChar, sizeof(WCHAR), NULL, &cChar, sizeof(CHAR)); 00633 GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info); 00634 #ifdef FE_SB // IsCharAlphaA() 00635 if ((ctype1info & C1_ALPHA) == C1_ALPHA) { 00636 WORD ctype3info = 0; 00637 if (!IS_DBCS_ENABLED()) { 00638 return TRUE; 00639 } 00640 /* 00641 * We don't want to return TRUE for halfwidth katakana. 00642 * Katakana is linguistic character (C1_ALPHA), but it is not 00643 * alphabet character. 00644 */ 00645 GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info); 00646 return ((ctype3info & (C3_KATAKANA|C3_HIRAGANA)) ? FALSE : TRUE); 00647 } 00648 return (FALSE); 00649 #else 00650 return (ctype1info & C1_ALPHA) == C1_ALPHA; 00651 #endif // FE_SB 00652 } 00653