ICU 52.1  52.1
normalizer2.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16 
17 #ifndef __NORMALIZER2_H__
18 #define __NORMALIZER2_H__
19 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/unorm2.h"
32 
34 
79 public:
84  ~Normalizer2();
85 
97  static const Normalizer2 *
98  getNFCInstance(UErrorCode &errorCode);
99 
111  static const Normalizer2 *
112  getNFDInstance(UErrorCode &errorCode);
113 
125  static const Normalizer2 *
126  getNFKCInstance(UErrorCode &errorCode);
127 
139  static const Normalizer2 *
140  getNFKDInstance(UErrorCode &errorCode);
141 
153  static const Normalizer2 *
154  getNFKCCasefoldInstance(UErrorCode &errorCode);
155 
177  static const Normalizer2 *
178  getInstance(const char *packageName,
179  const char *name,
180  UNormalization2Mode mode,
181  UErrorCode &errorCode);
182 
194  normalize(const UnicodeString &src, UErrorCode &errorCode) const {
195  UnicodeString result;
196  normalize(src, result, errorCode);
197  return result;
198  }
212  virtual UnicodeString &
213  normalize(const UnicodeString &src,
214  UnicodeString &dest,
215  UErrorCode &errorCode) const = 0;
230  virtual UnicodeString &
231  normalizeSecondAndAppend(UnicodeString &first,
232  const UnicodeString &second,
233  UErrorCode &errorCode) const = 0;
248  virtual UnicodeString &
249  append(UnicodeString &first,
250  const UnicodeString &second,
251  UErrorCode &errorCode) const = 0;
252 
266  virtual UBool
267  getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
268 
293  virtual UBool
294  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
295 
311  virtual UChar32
312  composePair(UChar32 a, UChar32 b) const;
313 
322  virtual uint8_t
323  getCombiningClass(UChar32 c) const;
324 
339  virtual UBool
340  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
341 
358  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
359 
382  virtual int32_t
383  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
384 
398  virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
399 
414  virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
415 
429  virtual UBool isInert(UChar32 c) const = 0;
430 };
431 
444 public:
455  FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
456  norm2(n2), set(filterSet) {}
457 
463 
477  virtual UnicodeString &
478  normalize(const UnicodeString &src,
479  UnicodeString &dest,
480  UErrorCode &errorCode) const;
495  virtual UnicodeString &
497  const UnicodeString &second,
498  UErrorCode &errorCode) const;
513  virtual UnicodeString &
514  append(UnicodeString &first,
515  const UnicodeString &second,
516  UErrorCode &errorCode) const;
517 
529  virtual UBool
530  getDecomposition(UChar32 c, UnicodeString &decomposition) const;
531 
543  virtual UBool
544  getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
545 
556  virtual UChar32
557  composePair(UChar32 a, UChar32 b) const;
558 
567  virtual uint8_t
568  getCombiningClass(UChar32 c) const;
569 
581  virtual UBool
582  isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
595  quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
607  virtual int32_t
608  spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
609 
618  virtual UBool hasBoundaryBefore(UChar32 c) const;
619 
628  virtual UBool hasBoundaryAfter(UChar32 c) const;
629 
637  virtual UBool isInert(UChar32 c) const;
638 private:
639  UnicodeString &
640  normalize(const UnicodeString &src,
641  UnicodeString &dest,
642  USetSpanCondition spanCondition,
643  UErrorCode &errorCode) const;
644 
645  UnicodeString &
647  const UnicodeString &second,
648  UBool doNormalize,
649  UErrorCode &errorCode) const;
650 
651  const Normalizer2 &norm2;
652  const UnicodeSet &set;
653 };
654 
656 
657 #endif // !UCONFIG_NO_NORMALIZATION
658 #endif // __NORMALIZER2_H__