1: <?php
  2: 
  3:   4:   5:   6:   7:   8:   9: 
 10: 
 11:  12:  13:  14:  15:  16:  17: 
 18: class Swift_CharacterReader_Utf8Reader implements Swift_CharacterReader
 19: {
 20:     
 21:     private static $length_map=array(
 22: 
 23:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 24:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 25:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 26:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 27:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 28:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 29:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 30:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 
 31:         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
 32:         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
 33:         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
 34:         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
 35:         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 
 36:         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 
 37:         3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 
 38:         4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0  
 39:     );
 40: 
 41:     private static $s_length_map=array(
 42:         "\x00"=>1, "\x01"=>1, "\x02"=>1, "\x03"=>1, "\x04"=>1, "\x05"=>1, "\x06"=>1, "\x07"=>1,
 43:         "\x08"=>1, "\x09"=>1, "\x0a"=>1, "\x0b"=>1, "\x0c"=>1, "\x0d"=>1, "\x0e"=>1, "\x0f"=>1,
 44:         "\x10"=>1, "\x11"=>1, "\x12"=>1, "\x13"=>1, "\x14"=>1, "\x15"=>1, "\x16"=>1, "\x17"=>1,
 45:         "\x18"=>1, "\x19"=>1, "\x1a"=>1, "\x1b"=>1, "\x1c"=>1, "\x1d"=>1, "\x1e"=>1, "\x1f"=>1,
 46:         "\x20"=>1, "\x21"=>1, "\x22"=>1, "\x23"=>1, "\x24"=>1, "\x25"=>1, "\x26"=>1, "\x27"=>1,
 47:         "\x28"=>1, "\x29"=>1, "\x2a"=>1, "\x2b"=>1, "\x2c"=>1, "\x2d"=>1, "\x2e"=>1, "\x2f"=>1,
 48:         "\x30"=>1, "\x31"=>1, "\x32"=>1, "\x33"=>1, "\x34"=>1, "\x35"=>1, "\x36"=>1, "\x37"=>1,
 49:         "\x38"=>1, "\x39"=>1, "\x3a"=>1, "\x3b"=>1, "\x3c"=>1, "\x3d"=>1, "\x3e"=>1, "\x3f"=>1,
 50:         "\x40"=>1, "\x41"=>1, "\x42"=>1, "\x43"=>1, "\x44"=>1, "\x45"=>1, "\x46"=>1, "\x47"=>1,
 51:         "\x48"=>1, "\x49"=>1, "\x4a"=>1, "\x4b"=>1, "\x4c"=>1, "\x4d"=>1, "\x4e"=>1, "\x4f"=>1,
 52:         "\x50"=>1, "\x51"=>1, "\x52"=>1, "\x53"=>1, "\x54"=>1, "\x55"=>1, "\x56"=>1, "\x57"=>1,
 53:         "\x58"=>1, "\x59"=>1, "\x5a"=>1, "\x5b"=>1, "\x5c"=>1, "\x5d"=>1, "\x5e"=>1, "\x5f"=>1,
 54:         "\x60"=>1, "\x61"=>1, "\x62"=>1, "\x63"=>1, "\x64"=>1, "\x65"=>1, "\x66"=>1, "\x67"=>1,
 55:         "\x68"=>1, "\x69"=>1, "\x6a"=>1, "\x6b"=>1, "\x6c"=>1, "\x6d"=>1, "\x6e"=>1, "\x6f"=>1,
 56:         "\x70"=>1, "\x71"=>1, "\x72"=>1, "\x73"=>1, "\x74"=>1, "\x75"=>1, "\x76"=>1, "\x77"=>1,
 57:         "\x78"=>1, "\x79"=>1, "\x7a"=>1, "\x7b"=>1, "\x7c"=>1, "\x7d"=>1, "\x7e"=>1, "\x7f"=>1,
 58:         "\x80"=>0, "\x81"=>0, "\x82"=>0, "\x83"=>0, "\x84"=>0, "\x85"=>0, "\x86"=>0, "\x87"=>0,
 59:         "\x88"=>0, "\x89"=>0, "\x8a"=>0, "\x8b"=>0, "\x8c"=>0, "\x8d"=>0, "\x8e"=>0, "\x8f"=>0,
 60:         "\x90"=>0, "\x91"=>0, "\x92"=>0, "\x93"=>0, "\x94"=>0, "\x95"=>0, "\x96"=>0, "\x97"=>0,
 61:         "\x98"=>0, "\x99"=>0, "\x9a"=>0, "\x9b"=>0, "\x9c"=>0, "\x9d"=>0, "\x9e"=>0, "\x9f"=>0,
 62:         "\xa0"=>0, "\xa1"=>0, "\xa2"=>0, "\xa3"=>0, "\xa4"=>0, "\xa5"=>0, "\xa6"=>0, "\xa7"=>0,
 63:         "\xa8"=>0, "\xa9"=>0, "\xaa"=>0, "\xab"=>0, "\xac"=>0, "\xad"=>0, "\xae"=>0, "\xaf"=>0,
 64:         "\xb0"=>0, "\xb1"=>0, "\xb2"=>0, "\xb3"=>0, "\xb4"=>0, "\xb5"=>0, "\xb6"=>0, "\xb7"=>0,
 65:         "\xb8"=>0, "\xb9"=>0, "\xba"=>0, "\xbb"=>0, "\xbc"=>0, "\xbd"=>0, "\xbe"=>0, "\xbf"=>0,
 66:         "\xc0"=>2, "\xc1"=>2, "\xc2"=>2, "\xc3"=>2, "\xc4"=>2, "\xc5"=>2, "\xc6"=>2, "\xc7"=>2,
 67:         "\xc8"=>2, "\xc9"=>2, "\xca"=>2, "\xcb"=>2, "\xcc"=>2, "\xcd"=>2, "\xce"=>2, "\xcf"=>2,
 68:         "\xd0"=>2, "\xd1"=>2, "\xd2"=>2, "\xd3"=>2, "\xd4"=>2, "\xd5"=>2, "\xd6"=>2, "\xd7"=>2,
 69:         "\xd8"=>2, "\xd9"=>2, "\xda"=>2, "\xdb"=>2, "\xdc"=>2, "\xdd"=>2, "\xde"=>2, "\xdf"=>2,
 70:         "\xe0"=>3, "\xe1"=>3, "\xe2"=>3, "\xe3"=>3, "\xe4"=>3, "\xe5"=>3, "\xe6"=>3, "\xe7"=>3,
 71:         "\xe8"=>3, "\xe9"=>3, "\xea"=>3, "\xeb"=>3, "\xec"=>3, "\xed"=>3, "\xee"=>3, "\xef"=>3,
 72:         "\xf0"=>4, "\xf1"=>4, "\xf2"=>4, "\xf3"=>4, "\xf4"=>4, "\xf5"=>4, "\xf6"=>4, "\xf7"=>4,
 73:         "\xf8"=>5, "\xf9"=>5, "\xfa"=>5, "\xfb"=>5, "\xfc"=>6, "\xfd"=>6, "\xfe"=>0, "\xff"=>0,
 74:      );
 75: 
 76:      77:  78:  79:  80:  81:  82:  83: 
 84:     public function getCharPositions($string, $startOffset, &$currentMap, &$ignoredChars)
 85:     {
 86:         if (!isset($currentMap['i']) || !isset($currentMap['p'])) {
 87:             $currentMap['p'] = $currentMap['i'] = array();
 88:      }
 89:         $strlen=strlen($string);
 90:         $charPos=count($currentMap['p']);
 91:         $foundChars=0;
 92:         $invalid=false;
 93:         for ($i=0; $i<$strlen; ++$i) {
 94:             $char=$string[$i];
 95:             $size=self::$s_length_map[$char];
 96:             if ($size==0) {
 97:                 
 98:                 $invalid=true;
 99:                 continue;
100:        } else {
101:          if ($invalid==true) {
102:            
103:            $currentMap['p'][$charPos+$foundChars]=$startOffset+$i;
104:            $currentMap['i'][$charPos+$foundChars]=true;
105:            ++$foundChars;
106:            $invalid=false;
107:          }
108:          if (($i+$size) > $strlen) {
109:            $ignoredChars=substr($string, $i);
110:            break;
111:          }
112:          for ($j=1; $j<$size; ++$j) {
113:                     $char=$string[$i+$j];
114:                     if ($char>"\x7F" && $char<"\xC0") {
115:                         
116:                     } else {
117:                         
118:                         $invalid=true;
119:                         continue 2;
120:                     }
121:          }
122:          
123:          $currentMap['p'][$charPos+$foundChars]=$startOffset+$i+$size;
124:          $i+=$j-1;
125:          ++$foundChars;
126:        }
127:         }
128: 
129:         return $foundChars;
130:     }
131: 
132:     133: 134: 135: 
136:     public function getMapType()
137:     {
138:         return self::MAP_TYPE_POSITIONS;
139:     }
140: 
141:     142: 143: 144: 145: 146: 147: 148: 149: 
150:     public function validateByteSequence($bytes, $size)
151:     {
152:         if ($size<1) {
153:             return -1;
154:         }
155:         $needed = self::$length_map[$bytes[0]] - $size;
156: 
157:         return ($needed > -1)
158:             ? $needed
159:             : -1
160:             ;
161:     }
162: 
163:     164: 165: 166: 
167:     public function getInitialByteSize()
168:     {
169:         return 1;
170:     }
171: }
172: