001 package org.rakeshv.tex;
002
003 import java.util.*;
004
005 /**
006 * <p>Implements a <code>HashMap</code> that maps non-ASCII
007 * ISO character entities to their plain ASCII
008 * equivalents.</p>
009 *
010 * <p>See <a href='http://www.w3.org/TR/html4/sgml/entities.html'>ISO Character Entities</a>
011 * for a full description of the character entities.</p>
012 *
013 * @author Rakesh Vidyadharan on 13 January, 2002
014 *
015 * <p>Copyright © 2002 Rakesh Vidyadharan</p>
016 */
017 public final class ISOToASCII
018 {
019 /**
020 * Specify that if no match is found for the specified character,
021 * in {@link #map HashMap}, return the input character as its
022 * <code>String</code> equivalent.
023 */
024 public static final boolean DO_NULL_CHECK = true;
025
026 /**
027 * Constant that indicates that a strict ASCII conversion is desired.
028 */
029 public static final boolean STRICT_ASCII = true;
030
031 /**
032 * The <code>HashMap</code> that holds the character mappings.
033 */
034 private static Map map = new HashMap();
035
036 /**
037 * The <code>HashMap</code> that hold the strict character mappings.
038 */
039 private static Map strictMap = new HashMap();
040
041 /**
042 * Static initialiser to populate the <code>HashMap</code>.
043 */
044 static
045 {
046 map.put( new Integer( 130 ), "'" );
047 map.put( new Integer( 132 ), "''" );
048 map.put( new Integer( 133 ), "..." );
049 map.put( new Integer( 134 ), "<dagger>" );
050 map.put( new Integer( 135 ), "<doubledagger>" );
051 map.put( new Integer( 136 ), "^" );
052 map.put( new Integer( 139 ), "<" );
053 map.put( new Integer( 140 ), "OE" );
054 map.put( new Integer( 145 ), "`" );
055 map.put( new Integer( 146 ), "'" );
056 map.put( new Integer( 147 ), "``" );
057 map.put( new Integer( 148 ), "''" );
058 map.put( new Integer( 149 ), "<bullet>" );
059 map.put( new Integer( 150 ), "--" );
060 map.put( new Integer( 151 ), "---" );
061 map.put( new Integer( 152 ), "~" );
062 map.put( new Integer( 153 ), "<TM>" );
063 map.put( new Integer( 155 ), ">" );
064 map.put( new Integer( 156 ), "oe" );
065 map.put( new Integer( 159 ), "Y" );
066 map.put( new Integer( 160 ), " " );
067 map.put( new Integer( 161 ), "!" );
068 map.put( new Integer( 162 ), "<cents>" );
069 map.put( new Integer( 163 ), "<pounds>" );
070 map.put( new Integer( 164 ), "<currency>" );
071 map.put( new Integer( 165 ), "<yuan>" );
072 map.put( new Integer( 166 ), "<broken vertical bar>" );
073 map.put( new Integer( 167 ), "<section sign>" );
074 map.put( new Integer( 168 ), " " );
075 map.put( new Integer( 169 ), "<copyright>" );
076 map.put( new Integer( 170 ), "<feminine ordinal indicator>" );
077 map.put( new Integer( 171 ), "<<" );
078 map.put( new Integer( 172 ), "<not sign>" );
079 map.put( new Integer( 173 ), "-" );
080 map.put( new Integer( 174 ), "<registered>" );
081 map.put( new Integer( 175 ), "-" );
082 map.put( new Integer( 176 ), "<degree>" );
083 map.put( new Integer( 177 ), "+-" );
084 map.put( new Integer( 178 ), "<superscript 2>" );
085 map.put( new Integer( 179 ), "<superscript 3>" );
086 map.put( new Integer( 180 ), "'" );
087 map.put( new Integer( 181 ), "<mu>" );
088 map.put( new Integer( 182 ), "<paragraph>" );
089 map.put( new Integer( 183 ), "<middle dot>" );
090 map.put( new Integer( 184 ), "<spacing cedilla>" );
091 map.put( new Integer( 185 ), "<superscript 1>" );
092 map.put( new Integer( 186 ), "<masculine ordinal indicator>" );
093 map.put( new Integer( 187 ), ">>" );
094 map.put( new Integer( 188 ), "1/4" );
095 map.put( new Integer( 189 ), "1/2" );
096 map.put( new Integer( 190 ), "3/4" );
097 map.put( new Integer( 191 ), "?" );
098 map.put( new Integer( 192 ), "A" );
099 map.put( new Integer( 193 ), "A" );
100 map.put( new Integer( 194 ), "A" );
101 map.put( new Integer( 195 ), "A" );
102 map.put( new Integer( 196 ), "A" );
103 map.put( new Integer( 197 ), "A" );
104 map.put( new Integer( 198 ), "AE" );
105 map.put( new Integer( 199 ), "C" );
106 map.put( new Integer( 200 ), "E" );
107 map.put( new Integer( 201 ), "E" );
108 map.put( new Integer( 202 ), "E" );
109 map.put( new Integer( 203 ), "E" );
110 map.put( new Integer( 204 ), "I" );
111 map.put( new Integer( 205 ), "I" );
112 map.put( new Integer( 206 ), "I" );
113 map.put( new Integer( 207 ), "I" );
114 map.put( new Integer( 209 ), "N" );
115 map.put( new Integer( 210 ), "O" );
116 map.put( new Integer( 211 ), "O" );
117 map.put( new Integer( 212 ), "O" );
118 map.put( new Integer( 213 ), "O" );
119 map.put( new Integer( 214 ), "O" );
120 map.put( new Integer( 215 ), " x " );
121 map.put( new Integer( 216 ), "O" );
122 map.put( new Integer( 217 ), "U" );
123 map.put( new Integer( 218 ), "U" );
124 map.put( new Integer( 219 ), "U" );
125 map.put( new Integer( 220 ), "U" );
126 map.put( new Integer( 221 ), "Y" );
127 map.put( new Integer( 223 ), "s" );
128 map.put( new Integer( 224 ), "a" );
129 map.put( new Integer( 225 ), "a" );
130 map.put( new Integer( 226 ), "a" );
131 map.put( new Integer( 227 ), "a" );
132 map.put( new Integer( 228 ), "a" );
133 map.put( new Integer( 229 ), "a" );
134 map.put( new Integer( 230 ), "ae" );
135 map.put( new Integer( 231 ), "c" );
136 map.put( new Integer( 232 ), "e" );
137 map.put( new Integer( 233 ), "e" );
138 map.put( new Integer( 234 ), "e" );
139 map.put( new Integer( 235 ), "e" );
140 map.put( new Integer( 236 ), "i" );
141 map.put( new Integer( 237 ), "i" );
142 map.put( new Integer( 238 ), "i" );
143 map.put( new Integer( 239 ), "i" );
144 map.put( new Integer( 241 ), "n" );
145 map.put( new Integer( 242 ), "o" );
146 map.put( new Integer( 243 ), "o" );
147 map.put( new Integer( 244 ), "o" );
148 map.put( new Integer( 245 ), "o" );
149 map.put( new Integer( 246 ), "o" );
150 map.put( new Integer( 247 ), " / " );
151 map.put( new Integer( 248 ), "o" );
152 map.put( new Integer( 249 ), "u" );
153 map.put( new Integer( 250 ), "u" );
154 map.put( new Integer( 251 ), "u" );
155 map.put( new Integer( 252 ), "u" );
156 map.put( new Integer( 253 ), "y" );
157 map.put( new Integer( 255 ), "y" );
158
159 strictMap.put( new Integer( 130 ), "'" );
160 strictMap.put( new Integer( 132 ), "\"" );
161 strictMap.put( new Integer( 133 ), "..." );
162 strictMap.put( new Integer( 136 ), "^" );
163 strictMap.put( new Integer( 139 ), "<" );
164 strictMap.put( new Integer( 140 ), "OE" );
165 strictMap.put( new Integer( 145 ), "'" );
166 strictMap.put( new Integer( 146 ), "'" );
167 strictMap.put( new Integer( 147 ), "\"" );
168 strictMap.put( new Integer( 148 ), "\"" );
169 strictMap.put( new Integer( 150 ), "--" );
170 strictMap.put( new Integer( 151 ), "---" );
171 strictMap.put( new Integer( 152 ), "~" );
172 strictMap.put( new Integer( 155 ), ">" );
173 strictMap.put( new Integer( 156 ), "oe" );
174 strictMap.put( new Integer( 159 ), "Y" );
175 strictMap.put( new Integer( 160 ), " " );
176 strictMap.put( new Integer( 161 ), "!" );
177 strictMap.put( new Integer( 163 ), "<pounds>" );
178 strictMap.put( new Integer( 168 ), " " );
179 strictMap.put( new Integer( 171 ), "<<" );
180 strictMap.put( new Integer( 173 ), "-" );
181 strictMap.put( new Integer( 175 ), "-" );
182 strictMap.put( new Integer( 177 ), "+-" );
183 strictMap.put( new Integer( 180 ), "'" );
184 strictMap.put( new Integer( 187 ), ">>" );
185 strictMap.put( new Integer( 188 ), "1/4" );
186 strictMap.put( new Integer( 189 ), "1/2" );
187 strictMap.put( new Integer( 190 ), "3/4" );
188 strictMap.put( new Integer( 191 ), "?" );
189 strictMap.put( new Integer( 192 ), "A" );
190 strictMap.put( new Integer( 193 ), "A" );
191 strictMap.put( new Integer( 194 ), "A" );
192 strictMap.put( new Integer( 195 ), "A" );
193 strictMap.put( new Integer( 196 ), "A" );
194 strictMap.put( new Integer( 197 ), "A" );
195 strictMap.put( new Integer( 198 ), "AE" );
196 strictMap.put( new Integer( 199 ), "C" );
197 strictMap.put( new Integer( 200 ), "E" );
198 strictMap.put( new Integer( 201 ), "E" );
199 strictMap.put( new Integer( 202 ), "E" );
200 strictMap.put( new Integer( 203 ), "E" );
201 strictMap.put( new Integer( 204 ), "I" );
202 strictMap.put( new Integer( 205 ), "I" );
203 strictMap.put( new Integer( 206 ), "I" );
204 strictMap.put( new Integer( 207 ), "I" );
205 strictMap.put( new Integer( 209 ), "N" );
206 strictMap.put( new Integer( 210 ), "O" );
207 strictMap.put( new Integer( 211 ), "O" );
208 strictMap.put( new Integer( 212 ), "O" );
209 strictMap.put( new Integer( 213 ), "O" );
210 strictMap.put( new Integer( 214 ), "O" );
211 strictMap.put( new Integer( 215 ), " x " );
212 strictMap.put( new Integer( 216 ), "O" );
213 strictMap.put( new Integer( 217 ), "U" );
214 strictMap.put( new Integer( 218 ), "U" );
215 strictMap.put( new Integer( 219 ), "U" );
216 strictMap.put( new Integer( 220 ), "U" );
217 strictMap.put( new Integer( 221 ), "Y" );
218 strictMap.put( new Integer( 223 ), "s" );
219 strictMap.put( new Integer( 224 ), "a" );
220 strictMap.put( new Integer( 225 ), "a" );
221 strictMap.put( new Integer( 226 ), "a" );
222 strictMap.put( new Integer( 227 ), "a" );
223 strictMap.put( new Integer( 228 ), "a" );
224 strictMap.put( new Integer( 229 ), "a" );
225 strictMap.put( new Integer( 230 ), "ae" );
226 strictMap.put( new Integer( 231 ), "c" );
227 strictMap.put( new Integer( 232 ), "e" );
228 strictMap.put( new Integer( 233 ), "e" );
229 strictMap.put( new Integer( 234 ), "e" );
230 strictMap.put( new Integer( 235 ), "e" );
231 strictMap.put( new Integer( 236 ), "i" );
232 strictMap.put( new Integer( 237 ), "i" );
233 strictMap.put( new Integer( 238 ), "i" );
234 strictMap.put( new Integer( 239 ), "i" );
235 strictMap.put( new Integer( 241 ), "n" );
236 strictMap.put( new Integer( 242 ), "o" );
237 strictMap.put( new Integer( 243 ), "o" );
238 strictMap.put( new Integer( 244 ), "o" );
239 strictMap.put( new Integer( 245 ), "o" );
240 strictMap.put( new Integer( 246 ), "o" );
241 strictMap.put( new Integer( 247 ), " / " );
242 strictMap.put( new Integer( 248 ), "o" );
243 strictMap.put( new Integer( 249 ), "u" );
244 strictMap.put( new Integer( 250 ), "u" );
245 strictMap.put( new Integer( 251 ), "u" );
246 strictMap.put( new Integer( 252 ), "u" );
247 strictMap.put( new Integer( 253 ), "y" );
248 strictMap.put( new Integer( 255 ), "y" );
249 }
250
251 /**
252 * Private constructor. This class cannot be instantiated.
253 */
254 private ISOToASCII() {}
255
256 /**
257 * <p>Returns the ASCII
258 * equivalent for the specified <code>char</code>.</p>
259 *
260 * <p>If the input character equivalent is not found, this
261 * method returns <code>null</code>. You will need to check
262 * the return value in your code.</p>
263 *
264 * @param input - The character that is to be transformed.
265 * @return String - The ASCII equivalent.
266 */
267 public static String getASCIIEquivalent( char input )
268 {
269 return ( (String) map.get( new Integer( input ) ) );
270 }
271
272 /**
273 * <p>Returns the ASCII
274 * equivalent for the specified <code>char</code>.</p>
275 *
276 * <p>If the input character equivalent is not found, and
277 * checkFlag is specified as {@link #DO_NULL_CHECK DO_NULL_CHECK},
278 * then, a <code>String</code> representation of the input
279 * character is returned.</p>
280 *
281 * @param input - The character that is to be transformed.
282 * @param checkFlag - Specify that checking for existence
283 * of ASCII equivalent should be done in the method or not. To
284 * enable checking, specify {@link #DO_NULL_CHECK DO_NULL_CHECK}.
285 * @return String - The LaTeX equivalent.
286 */
287 public static String getASCIIEquivalent( char input,
288 boolean checkFlag )
289 {
290 if ( checkFlag == DO_NULL_CHECK )
291 {
292 String result = (String) map.get( new Integer( input ) );
293 if ( result == null )
294 {
295 return ( String.valueOf( input ) );
296 }
297
298 return result;
299 }
300 else
301 {
302 return getASCIIEquivalent( input );
303 }
304 }
305
306 /**
307 * <p>Convert all ISO 8859-1 encoded non-ASCII characters to
308 * appropriate ASCII characters (or strings), and return the
309 * modified <code>String</code> object.</p>
310 *
311 * @param inString - The String object that is to be modified.
312 * @return String - The modified String object.
313 */
314 public static String convertString( String inString )
315 {
316 StringBuffer sbuf = new StringBuffer();
317
318 for ( int i = 0; i < inString.length(); ++i )
319 {
320 char currentChar = inString.charAt( i );
321 if ( currentChar > 127 )
322 {
323 String equivalent = (String) map.get( new Integer( currentChar ) );
324 if ( equivalent == null )
325 {
326 sbuf.append( currentChar );
327 }
328 else
329 {
330 sbuf.append( equivalent );
331 }
332 }
333 else
334 {
335 sbuf.append( currentChar );
336 }
337 }
338
339 return sbuf.toString();
340 }
341
342 /**
343 * Convert all ISO 8859-1 encoded non-ASCII characters to
344 * appropriate ASCII characters (or strings), and return the
345 * modified <code>String</code> object. When <code>strict</code>
346 * conversion is specified special characters that do not have
347 * direct ASCII equivalents, will be suppressed.
348 *
349 * @param inString - The String object that is to be modified.
350 * @param strict - Specified whether strict conversion is to be
351 * performed. Use {@link #STRICT_ASCII STRICT_ASCII} to
352 * specify preference.
353 * @return String - The modified String object.
354 */
355 public static String convertString( String inString, boolean strict )
356 {
357 if ( strict == STRICT_ASCII )
358 {
359 StringBuffer sbuf = new StringBuffer();
360
361 for ( int i = 0; i < inString.length(); ++i )
362 {
363 char currentChar = inString.charAt( i );
364 if ( currentChar > 127 )
365 {
366 String equivalent = (String) strictMap.get( new Integer( currentChar ) );
367 if ( equivalent != null )
368 {
369 sbuf.append( equivalent );
370 }
371 }
372 else
373 {
374 sbuf.append( currentChar );
375 }
376 }
377
378 return sbuf.toString();
379 }
380 else
381 {
382 return convertString( inString );
383 }
384 }
385 }