001    package org.rakeshv.tex;
002    
003    import java.util.*;
004    
005    /**
006            *       <p>Implements a <code>HashMap</code> that maps non-ASCII
007            *       ISO character entities to their plain ASCII
008            *       equivalents.</p>
009            *
010            *       <p>See <a href='http://www.w3.org/TR/html4/sgml/entities.html'>ISO Character Entities</a>
011            *       for a full description of the character entities.</p>
012            *
013            *       @author Rakesh Vidyadharan on 13 January, 2002
014            *
015            *       <p>Copyright &copy; 2002 Rakesh Vidyadharan</p>
016            */
017    public final class ISOToASCII
018    {
019            /**
020                    *       Specify that if no match is found for the specified character,
021                    *       in {@link #map HashMap}, return the input character as its
022                    *       <code>String</code> equivalent.
023                    */
024            public static final boolean DO_NULL_CHECK = true;
025    
026            /**
027                    *       Constant that indicates that a strict ASCII conversion is desired.
028                    */
029            public static final boolean STRICT_ASCII = true;
030    
031            /**
032                    *       The <code>HashMap</code> that holds the character mappings.
033                    */
034            private static Map map = new HashMap();
035    
036            /**
037                    *       The <code>HashMap</code> that hold the strict character mappings.
038                    */
039            private static Map strictMap = new HashMap();
040    
041            /**
042                    *       Static initialiser to populate the <code>HashMap</code>.
043                    */
044            static
045            {
046                    map.put( new Integer( 130 ), "'" );
047                    map.put( new Integer( 132 ), "''" );
048                    map.put( new Integer( 133 ), "..." );
049                    map.put( new Integer( 134 ), "<dagger>" );
050                    map.put( new Integer( 135 ), "<doubledagger>" );
051                    map.put( new Integer( 136 ), "^" );
052                    map.put( new Integer( 139 ), "<" );
053                    map.put( new Integer( 140 ), "OE" );
054                    map.put( new Integer( 145 ), "`" );
055                    map.put( new Integer( 146 ), "'" );
056                    map.put( new Integer( 147 ), "``" );
057                    map.put( new Integer( 148 ), "''" );
058                    map.put( new Integer( 149 ), "<bullet>" );
059                    map.put( new Integer( 150 ), "--" );
060                    map.put( new Integer( 151 ), "---" );
061                    map.put( new Integer( 152 ), "~" );
062                    map.put( new Integer( 153 ), "<TM>" );
063                    map.put( new Integer( 155 ), ">" );
064                    map.put( new Integer( 156 ), "oe" );
065                    map.put( new Integer( 159 ), "Y" );
066                    map.put( new Integer( 160 ), " " );
067                    map.put( new Integer( 161 ), "!" );
068                    map.put( new Integer( 162 ), "<cents>" );
069                    map.put( new Integer( 163 ), "<pounds>" );
070                    map.put( new Integer( 164 ), "<currency>" );
071                    map.put( new Integer( 165 ), "<yuan>" );
072                    map.put( new Integer( 166 ), "<broken vertical bar>" );
073                    map.put( new Integer( 167 ), "<section sign>" );
074                    map.put( new Integer( 168 ), " " );
075                    map.put( new Integer( 169 ), "<copyright>" );
076                    map.put( new Integer( 170 ), "<feminine ordinal indicator>" );
077                    map.put( new Integer( 171 ), "<<" );
078                    map.put( new Integer( 172 ), "<not sign>" );
079                    map.put( new Integer( 173 ), "-" );
080                    map.put( new Integer( 174 ), "<registered>" );
081                    map.put( new Integer( 175 ), "-" );
082                    map.put( new Integer( 176 ), "<degree>" );
083                    map.put( new Integer( 177 ), "+-" );
084                    map.put( new Integer( 178 ), "<superscript 2>" );
085                    map.put( new Integer( 179 ), "<superscript 3>" );
086                    map.put( new Integer( 180 ), "'" );
087                    map.put( new Integer( 181 ), "<mu>" );
088                    map.put( new Integer( 182 ), "<paragraph>" );
089                    map.put( new Integer( 183 ), "<middle dot>" );
090                    map.put( new Integer( 184 ), "<spacing cedilla>" );
091                    map.put( new Integer( 185 ), "<superscript 1>" );
092                    map.put( new Integer( 186 ), "<masculine ordinal indicator>" );
093                    map.put( new Integer( 187 ), ">>" );
094                    map.put( new Integer( 188 ), "1/4" );
095                    map.put( new Integer( 189 ), "1/2" );
096                    map.put( new Integer( 190 ), "3/4" );
097                    map.put( new Integer( 191 ), "?" );
098                    map.put( new Integer( 192 ), "A" );
099                    map.put( new Integer( 193 ), "A" );
100                    map.put( new Integer( 194 ), "A" );
101                    map.put( new Integer( 195 ), "A" );
102                    map.put( new Integer( 196 ), "A" );
103                    map.put( new Integer( 197 ), "A" );
104                    map.put( new Integer( 198 ), "AE" );
105                    map.put( new Integer( 199 ), "C" );
106                    map.put( new Integer( 200 ), "E" );
107                    map.put( new Integer( 201 ), "E" );
108                    map.put( new Integer( 202 ), "E" );
109                    map.put( new Integer( 203 ), "E" );
110                    map.put( new Integer( 204 ), "I" );
111                    map.put( new Integer( 205 ), "I" );
112                    map.put( new Integer( 206 ), "I" );
113                    map.put( new Integer( 207 ), "I" );
114                    map.put( new Integer( 209 ), "N" );
115                    map.put( new Integer( 210 ), "O" );
116                    map.put( new Integer( 211 ), "O" );
117                    map.put( new Integer( 212 ), "O" );
118                    map.put( new Integer( 213 ), "O" );
119                    map.put( new Integer( 214 ), "O" );
120                    map.put( new Integer( 215 ), " x " );
121                    map.put( new Integer( 216 ), "O" );
122                    map.put( new Integer( 217 ), "U" );
123                    map.put( new Integer( 218 ), "U" );
124                    map.put( new Integer( 219 ), "U" );
125                    map.put( new Integer( 220 ), "U" );
126                    map.put( new Integer( 221 ), "Y" );
127                    map.put( new Integer( 223 ), "s" );
128                    map.put( new Integer( 224 ), "a" );
129                    map.put( new Integer( 225 ), "a" );
130                    map.put( new Integer( 226 ), "a" );
131                    map.put( new Integer( 227 ), "a" );
132                    map.put( new Integer( 228 ), "a" );
133                    map.put( new Integer( 229 ), "a" );
134                    map.put( new Integer( 230 ), "ae" );
135                    map.put( new Integer( 231 ), "c" );
136                    map.put( new Integer( 232 ), "e" );
137                    map.put( new Integer( 233 ), "e" );
138                    map.put( new Integer( 234 ), "e" );
139                    map.put( new Integer( 235 ), "e" );
140                    map.put( new Integer( 236 ), "i" );
141                    map.put( new Integer( 237 ), "i" );
142                    map.put( new Integer( 238 ), "i" );
143                    map.put( new Integer( 239 ), "i" );
144                    map.put( new Integer( 241 ), "n" );
145                    map.put( new Integer( 242 ), "o" );
146                    map.put( new Integer( 243 ), "o" );
147                    map.put( new Integer( 244 ), "o" );
148                    map.put( new Integer( 245 ), "o" );
149                    map.put( new Integer( 246 ), "o" );
150                    map.put( new Integer( 247 ), " / " );
151                    map.put( new Integer( 248 ), "o" );
152                    map.put( new Integer( 249 ), "u" );
153                    map.put( new Integer( 250 ), "u" );
154                    map.put( new Integer( 251 ), "u" );
155                    map.put( new Integer( 252 ), "u" );
156                    map.put( new Integer( 253 ), "y" );
157                    map.put( new Integer( 255 ), "y" );
158    
159                    strictMap.put( new Integer( 130 ), "'" );
160                    strictMap.put( new Integer( 132 ), "\"" );
161                    strictMap.put( new Integer( 133 ), "..." );
162                    strictMap.put( new Integer( 136 ), "^" );
163                    strictMap.put( new Integer( 139 ), "<" );
164                    strictMap.put( new Integer( 140 ), "OE" );
165                    strictMap.put( new Integer( 145 ), "'" );
166                    strictMap.put( new Integer( 146 ), "'" );
167                    strictMap.put( new Integer( 147 ), "\"" );
168                    strictMap.put( new Integer( 148 ), "\"" );
169                    strictMap.put( new Integer( 150 ), "--" );
170                    strictMap.put( new Integer( 151 ), "---" );
171                    strictMap.put( new Integer( 152 ), "~" );
172                    strictMap.put( new Integer( 155 ), ">" );
173                    strictMap.put( new Integer( 156 ), "oe" );
174                    strictMap.put( new Integer( 159 ), "Y" );
175                    strictMap.put( new Integer( 160 ), " " );
176                    strictMap.put( new Integer( 161 ), "!" );
177                    strictMap.put( new Integer( 163 ), "<pounds>" );
178                    strictMap.put( new Integer( 168 ), " " );
179                    strictMap.put( new Integer( 171 ), "<<" );
180                    strictMap.put( new Integer( 173 ), "-" );
181                    strictMap.put( new Integer( 175 ), "-" );
182                    strictMap.put( new Integer( 177 ), "+-" );
183                    strictMap.put( new Integer( 180 ), "'" );
184                    strictMap.put( new Integer( 187 ), ">>" );
185                    strictMap.put( new Integer( 188 ), "1/4" );
186                    strictMap.put( new Integer( 189 ), "1/2" );
187                    strictMap.put( new Integer( 190 ), "3/4" );
188                    strictMap.put( new Integer( 191 ), "?" );
189                    strictMap.put( new Integer( 192 ), "A" );
190                    strictMap.put( new Integer( 193 ), "A" );
191                    strictMap.put( new Integer( 194 ), "A" );
192                    strictMap.put( new Integer( 195 ), "A" );
193                    strictMap.put( new Integer( 196 ), "A" );
194                    strictMap.put( new Integer( 197 ), "A" );
195                    strictMap.put( new Integer( 198 ), "AE" );
196                    strictMap.put( new Integer( 199 ), "C" );
197                    strictMap.put( new Integer( 200 ), "E" );
198                    strictMap.put( new Integer( 201 ), "E" );
199                    strictMap.put( new Integer( 202 ), "E" );
200                    strictMap.put( new Integer( 203 ), "E" );
201                    strictMap.put( new Integer( 204 ), "I" );
202                    strictMap.put( new Integer( 205 ), "I" );
203                    strictMap.put( new Integer( 206 ), "I" );
204                    strictMap.put( new Integer( 207 ), "I" );
205                    strictMap.put( new Integer( 209 ), "N" );
206                    strictMap.put( new Integer( 210 ), "O" );
207                    strictMap.put( new Integer( 211 ), "O" );
208                    strictMap.put( new Integer( 212 ), "O" );
209                    strictMap.put( new Integer( 213 ), "O" );
210                    strictMap.put( new Integer( 214 ), "O" );
211                    strictMap.put( new Integer( 215 ), " x " );
212                    strictMap.put( new Integer( 216 ), "O" );
213                    strictMap.put( new Integer( 217 ), "U" );
214                    strictMap.put( new Integer( 218 ), "U" );
215                    strictMap.put( new Integer( 219 ), "U" );
216                    strictMap.put( new Integer( 220 ), "U" );
217                    strictMap.put( new Integer( 221 ), "Y" );
218                    strictMap.put( new Integer( 223 ), "s" );
219                    strictMap.put( new Integer( 224 ), "a" );
220                    strictMap.put( new Integer( 225 ), "a" );
221                    strictMap.put( new Integer( 226 ), "a" );
222                    strictMap.put( new Integer( 227 ), "a" );
223                    strictMap.put( new Integer( 228 ), "a" );
224                    strictMap.put( new Integer( 229 ), "a" );
225                    strictMap.put( new Integer( 230 ), "ae" );
226                    strictMap.put( new Integer( 231 ), "c" );
227                    strictMap.put( new Integer( 232 ), "e" );
228                    strictMap.put( new Integer( 233 ), "e" );
229                    strictMap.put( new Integer( 234 ), "e" );
230                    strictMap.put( new Integer( 235 ), "e" );
231                    strictMap.put( new Integer( 236 ), "i" );
232                    strictMap.put( new Integer( 237 ), "i" );
233                    strictMap.put( new Integer( 238 ), "i" );
234                    strictMap.put( new Integer( 239 ), "i" );
235                    strictMap.put( new Integer( 241 ), "n" );
236                    strictMap.put( new Integer( 242 ), "o" );
237                    strictMap.put( new Integer( 243 ), "o" );
238                    strictMap.put( new Integer( 244 ), "o" );
239                    strictMap.put( new Integer( 245 ), "o" );
240                    strictMap.put( new Integer( 246 ), "o" );
241                    strictMap.put( new Integer( 247 ), " / " );
242                    strictMap.put( new Integer( 248 ), "o" );
243                    strictMap.put( new Integer( 249 ), "u" );
244                    strictMap.put( new Integer( 250 ), "u" );
245                    strictMap.put( new Integer( 251 ), "u" );
246                    strictMap.put( new Integer( 252 ), "u" );
247                    strictMap.put( new Integer( 253 ), "y" );
248                    strictMap.put( new Integer( 255 ), "y" );
249            }
250    
251            /**
252                    *       Private constructor.  This class cannot be instantiated.
253                    */
254            private ISOToASCII() {}
255    
256            /**
257                    *       <p>Returns the ASCII
258                    *       equivalent for the specified <code>char</code>.</p>
259                    *
260                    *       <p>If the input character equivalent is not found, this
261                    *       method returns <code>null</code>.  You will need to check
262                    *       the return value in your code.</p>
263                    *
264                    *       @param input - The character that is to be transformed.
265                    *       @return String - The ASCII equivalent.
266                    */
267            public static String getASCIIEquivalent( char input )
268            {
269                    return ( (String) map.get( new Integer( input ) ) );
270            }
271    
272            /**
273                    *       <p>Returns the ASCII
274                    *       equivalent for the specified <code>char</code>.</p>
275                    *
276                    *       <p>If the input character equivalent is not found, and
277                    *       checkFlag is specified as {@link #DO_NULL_CHECK DO_NULL_CHECK},
278                    *       then, a <code>String</code> representation of the input
279                    *       character is returned.</p>
280                    *
281                    *       @param input - The character that is to be transformed.
282                    *       @param checkFlag - Specify that checking for existence
283                    *               of ASCII equivalent should be done in the method or not.  To
284                    *               enable checking, specify {@link #DO_NULL_CHECK DO_NULL_CHECK}.
285                    *       @return String - The LaTeX equivalent.
286                    */
287            public static String getASCIIEquivalent( char input, 
288                    boolean checkFlag )
289            {
290                    if ( checkFlag == DO_NULL_CHECK )
291                    {
292                            String result = (String) map.get( new Integer( input ) );
293                            if ( result == null )
294                            {
295                                    return ( String.valueOf( input ) );
296                            }
297    
298                            return result;
299                    }
300                    else
301                    {
302                            return getASCIIEquivalent( input );
303                    }
304            }
305    
306            /**
307                    *       <p>Convert all ISO 8859-1 encoded non-ASCII characters to
308                    *       appropriate ASCII characters (or strings), and return the
309                    *       modified <code>String</code> object.</p>
310                    *
311                    *       @param inString - The String object that is to be modified.
312                    *       @return String - The modified String object.
313                    */
314            public static String convertString( String inString )
315            {
316                    StringBuffer sbuf = new StringBuffer();
317    
318                    for ( int i = 0; i < inString.length(); ++i )
319                    {
320                            char currentChar = inString.charAt( i );
321                            if ( currentChar > 127 )
322                            {
323                                    String equivalent = (String) map.get( new Integer( currentChar ) );
324                                    if ( equivalent == null )
325                                    {
326                                            sbuf.append( currentChar );
327                                    }
328                                    else
329                                    {
330                                            sbuf.append( equivalent );
331                                    }
332                            }
333                            else
334                            {
335                                    sbuf.append( currentChar );
336                            }
337                    }
338    
339                    return sbuf.toString();
340            }
341    
342            /**
343                    *       Convert all ISO 8859-1 encoded non-ASCII characters to 
344                    *       appropriate ASCII characters (or strings), and return the
345                    *       modified <code>String</code> object.  When <code>strict</code>
346                    *       conversion is specified special characters that do not have
347                    *       direct ASCII equivalents, will be suppressed.
348                    *
349                    *       @param inString - The String object that is to be modified.
350                    *       @param strict - Specified whether strict conversion is to be
351                    *               performed.  Use {@link #STRICT_ASCII STRICT_ASCII} to 
352                    *               specify preference.
353                    *       @return String - The modified String object.
354                    */
355            public static String convertString( String inString, boolean strict )
356            {
357                    if ( strict == STRICT_ASCII )
358                    {
359                            StringBuffer sbuf = new StringBuffer();
360    
361                            for ( int i = 0; i < inString.length(); ++i )
362                            {
363                                    char currentChar = inString.charAt( i );
364                                    if ( currentChar > 127 )
365                                    {
366                                            String equivalent = (String) strictMap.get( new Integer( currentChar ) );
367                                            if ( equivalent != null )
368                                            {
369                                                    sbuf.append( equivalent );
370                                            }
371                                    }
372                                    else
373                                    {
374                                            sbuf.append( currentChar );
375                                    }
376                            }
377    
378                            return sbuf.toString();
379                    }
380                    else
381                    {
382                            return convertString( inString );
383                    }
384            }
385    }