001 package org.rakeshv.tex;
002
003 import java.io.*;
004 import java.util.*;
005
006 /**
007 * <p>Implements a <code>HashMap</code> that maps accentuated
008 * ISO character entities to their
009 * L<sup><small>A</small></sup>T<sub><small>E</small></sub>X
010 * equivalents.</p>
011 *
012 * <p>See <a href='http://www.w3.org/TR/html4/sgml/entities.html'>ISO Character Entities</a>
013 * for a full description of the character entities.</p>
014 *
015 * @author Rakesh Vidyadharan on 13 January, 2002
016 *
017 * <p>Copyright © 2002 Rakesh Vidyadharan</p>
018 */
019 public final class ISOToTeX
020 {
021 /**
022 * The <code>HashMap</code> that holds the character mappings.
023 */
024 private static Map map = new HashMap();
025
026 /**
027 * Static initialiser to populate the <code>HashMap</code>.
028 */
029 static
030 {
031 map.put( new Integer( 130 ), "'" );
032 map.put( new Integer( 132 ), "''" );
033 map.put( new Integer( 133 ), "{\\dots}" );
034 map.put( new Integer( 134 ), "{\\dag}" );
035 map.put( new Integer( 135 ), "{\\ddag}" );
036 map.put( new Integer( 136 ), "{\\^{ }}" );
037 map.put( new Integer( 139 ), "<" );
038 map.put( new Integer( 140 ), "{\\OE}" );
039 map.put( new Integer( 145 ), "`" );
040 map.put( new Integer( 146 ), "'" );
041 map.put( new Integer( 147 ), "``" );
042 map.put( new Integer( 148 ), "''" );
043 map.put( new Integer( 149 ), "{\\bullet}" );
044 map.put( new Integer( 150 ), "{\\textendash}" );
045 map.put( new Integer( 151 ), "{\\textemdash}" );
046 map.put( new Integer( 152 ), "{\\~{ }}" );
047 map.put( new Integer( 153 ), "{\\ensuremath{^\\mathrm{{\\small TM}}}}" );
048 map.put( new Integer( 155 ), ">" );
049 map.put( new Integer( 156 ), "{\\oe}" );
050 map.put( new Integer( 159 ), "\\\"{Y}" );
051 map.put( new Integer( 160 ), "{\\~{ }}" );
052 map.put( new Integer( 161 ), "{!`}" );
053 map.put( new Integer( 163 ), "{\\pounds}" );
054 map.put( new Integer( 167 ), "{\\S}" );
055 map.put( new Integer( 168 ), "\\\"{ }" );
056 map.put( new Integer( 169 ), "{\\copyright}" );
057 map.put( new Integer( 170 ), "{\\ensuremath{^{\\tiny \\underline{a}}}" );
058 map.put( new Integer( 171 ), "{\\ensuremath{\\ll}}" );
059 map.put( new Integer( 172 ), "{\\ensuremath{\\neg}}" );
060 map.put( new Integer( 173 ), "{\\textendash}" );
061 map.put( new Integer( 174 ), "\\ensuremath{^{\\ooalign{\\hfil\\raise.07ex\\hbox{\\rm\\tiny R}\\hfil\\crcr{\\scriptsize\\mathhexbox20D}}}}" );
062 map.put( new Integer( 175 ), "{\\ensuremath{^-}}" );
063 map.put( new Integer( 176 ), "{\\ensuremath{^\\circ}}" );
064 map.put( new Integer( 177 ), "{\\ensuremath{^\\pm}}" );
065 map.put( new Integer( 178 ), "{\\ensuremath{^2}}" );
066 map.put( new Integer( 179 ), "{\\ensuremath{^3}}" );
067 map.put( new Integer( 180 ), "{\\ensuremath{^\\prime}}" );
068 map.put( new Integer( 181 ), "{\\ensuremath{^\\mu}}" );
069 map.put( new Integer( 182 ), "{\\P}" );
070 map.put( new Integer( 183 ), "{\\ensuremath{^\\cdot}}" );
071 map.put( new Integer( 184 ), "{\\c{ }}" );
072 map.put( new Integer( 185 ), "{\\ensuremath{^1}}" );
073 map.put( new Integer( 186 ), "{\\ensuremath{^{\\tiny \\underline{o}}}" );
074 map.put( new Integer( 187 ), "{\\ensuremath{^gg}}" );
075 map.put( new Integer( 188 ), "{\\ensuremath{\\frac{1}{4}}}" );
076 map.put( new Integer( 189 ), "{\\ensuremath{\\frac{1}{2}}}" );
077 map.put( new Integer( 190 ), "{\\ensuremath{\\frac{3}{4}}}" );
078 map.put( new Integer( 191 ), "{?`}}" );
079 map.put( new Integer( 192 ), "\\`{A}" );
080 map.put( new Integer( 193 ), "\\'{A}" );
081 map.put( new Integer( 194 ), "\\^{A}" );
082 map.put( new Integer( 195 ), "\\~{A}" );
083 map.put( new Integer( 196 ), "\\\"{A}" );
084 map.put( new Integer( 197 ), "\\r{A}" );
085 map.put( new Integer( 198 ), "\\{AE}" );
086 map.put( new Integer( 199 ), "\\c{C}" );
087 map.put( new Integer( 200 ), "\\`{E}" );
088 map.put( new Integer( 201 ), "\\'{E}" );
089 map.put( new Integer( 202 ), "\\^{E}" );
090 map.put( new Integer( 203 ), "\\\"{E}" );
091 map.put( new Integer( 204 ), "\\`{I}" );
092 map.put( new Integer( 205 ), "\\'{I}" );
093 map.put( new Integer( 206 ), "\\^{I}" );
094 map.put( new Integer( 207 ), "\\\"{I}" );
095 map.put( new Integer( 209 ), "\\~{N}" );
096 map.put( new Integer( 210 ), "\\`{O}" );
097 map.put( new Integer( 211 ), "\\'{O}" );
098 map.put( new Integer( 212 ), "\\^{O}" );
099 map.put( new Integer( 213 ), "\\~{O}" );
100 map.put( new Integer( 214 ), "\\\"{O}" );
101 map.put( new Integer( 215 ), "{\\ensuremath{\\times}" );
102 map.put( new Integer( 216 ), "{\\O}" );
103 map.put( new Integer( 217 ), "\\`{U}" );
104 map.put( new Integer( 218 ), "\\'{U}" );
105 map.put( new Integer( 219 ), "\\^{U}" );
106 map.put( new Integer( 220 ), "\\\"{U}" );
107 map.put( new Integer( 221 ), "\\'{Y}" );
108 map.put( new Integer( 223 ), "{\\ss}" );
109 map.put( new Integer( 224 ), "\\`{a}" );
110 map.put( new Integer( 225 ), "\\'{a}" );
111 map.put( new Integer( 226 ), "\\^{a}" );
112 map.put( new Integer( 227 ), "\\~{a}" );
113 map.put( new Integer( 228 ), "\\\"{a}" );
114 map.put( new Integer( 229 ), "\\r{a}" );
115 map.put( new Integer( 230 ), "{\\ae}" );
116 map.put( new Integer( 231 ), "\\c{c}" );
117 map.put( new Integer( 232 ), "\\`{e}" );
118 map.put( new Integer( 233 ), "\\'{e}" );
119 map.put( new Integer( 234 ), "\\^{e}" );
120 map.put( new Integer( 235 ), "\\\"{e}" );
121 map.put( new Integer( 236 ), "\\`{i}" );
122 map.put( new Integer( 237 ), "\\'{i}" );
123 map.put( new Integer( 238 ), "\\^{i}" );
124 map.put( new Integer( 239 ), "\\\"{i}" );
125 map.put( new Integer( 241 ), "\\~{n}" );
126 map.put( new Integer( 242 ), "\\`{o}" );
127 map.put( new Integer( 243 ), "\\'{o}" );
128 map.put( new Integer( 244 ), "\\^{o}" );
129 map.put( new Integer( 245 ), "\\~{o}" );
130 map.put( new Integer( 246 ), "\\\"{o}" );
131 map.put( new Integer( 247 ), "{\\ensuremath{\\div}}" );
132 map.put( new Integer( 248 ), "{\\o}" );
133 map.put( new Integer( 249 ), "\\`{u}" );
134 map.put( new Integer( 250 ), "\\'{u}" );
135 map.put( new Integer( 251 ), "\\^{u}" );
136 map.put( new Integer( 252 ), "\\\"{u}" );
137 map.put( new Integer( 253 ), "\\'{y}" );
138 map.put( new Integer( 255 ), "\\\"{y}" );
139 }
140
141 /**
142 * Specify that you wish to get back the input character as a
143 * <code>String</code> equivalent, if no match is found in the
144 * {@link #map HashMap}.
145 */
146 public static final boolean DO_NULL_CHECK = true;
147
148 /**
149 * Private constructor. This class cannot be instantiated.
150 */
151 private void ISOToTeX() {}
152
153 /**
154 * <p>Returns the
155 * L<sup><small>A</small></sup>T<sub><small>E</small></sub>X
156 * equivalent for the specified <code>char</code>.</p>
157 *
158 * <p>If the input character equivalent is not found, this
159 * method returns <code>null</code>. You will have to check
160 * the return value, and take appropriate action.</p>
161 *
162 * @param input - The character that is to be transformed.
163 * @return String - The LaTeX equivalent.
164 */
165 public static String getTeXEquivalent( char input )
166 {
167 return ( (String) map.get( new Integer( input ) ) );
168 }
169
170 /**
171 * <p>Returns the
172 * L<sup><small>A</small></sup>T<sub><small>E</small></sub>X
173 * equivalent for the specified <code>char</code>.</p>
174 *
175 * <p>If the input character equivalent is not found, and
176 * checkFlag is specified as {@link #DO_NULL_CHECK DO_NULL_CHECK},
177 * then, a <code>String</code> representation of the input
178 * character is returned.</p>
179 *
180 * @param input - The character that is to be transformed.
181 * @param checkFlag - Specify that checking for existence
182 * of ASCII equivalent should be done in the method or not. To
183 * enable checking, specify {@link #DO_NULL_CHECK DO_NULL_CHECK}.
184 * @return String - The LaTeX equivalent.
185 */
186 public static String getTeXEquivalent( char input,
187 boolean checkFlag )
188 {
189 if ( checkFlag == DO_NULL_CHECK )
190 {
191 String result = (String) map.get( new Integer( input ) );
192 if ( result == null )
193 {
194 return ( String.valueOf( input ) );
195 }
196
197 return result;
198 }
199 else
200 {
201 return getTeXEquivalent( input );
202 }
203 }
204
205 /**
206 * <p>Convert all ISO 8859-1 encoded non-ASCII characters to
207 * appropriate L<sup>A</sup>T<sub>E</sub>X tags, and return the
208 * modified <code>String</code> object.</p>
209 *
210 * @param inString - The String object that is to be modified.
211 * @return String - The modified String object.
212 */
213 public static String convertString( String inString )
214 {
215 StringBuffer sbuf = new StringBuffer();
216
217 for ( int i = 0; i < inString.length(); ++i )
218 {
219 char currentChar = inString.charAt( i );
220 if ( currentChar > 127 )
221 {
222 String equivalent = (String) map.get( new Integer( currentChar ) );
223 if ( equivalent == null )
224 {
225 System.out.println( "Current character not matched " + currentChar );
226 sbuf.append( currentChar );
227 }
228 else
229 {
230 sbuf.append( equivalent );
231 }
232 }
233 else
234 {
235 sbuf.append( currentChar );
236 }
237 }
238
239 return sbuf.toString();
240 }
241
242 /**
243 * <p>Converts the input <code>String</code> in the specified
244 * character encoding to <code>ISO 8859-1</code>
245 * encoding, and then invokes {@link #convertString( String )
246 * convertString( String )}.</p>
247 *
248 * @param inString - The String object in Windows Cp1252 format
249 * that is to be converted.
250 * @param encoding - The character encoding of input string.
251 * @return String - The modified String object.
252 */
253 public static String convertString( String inString, String encoding )
254 {
255 String text = inString;
256 String isoString;
257
258 try
259 {
260 byte[] byteArray = text.getBytes( encoding );
261 isoString = new String( byteArray, "ISO-8859-1" );
262 }
263 catch ( UnsupportedEncodingException ueex )
264 {
265 isoString = text;
266 System.err.println( "The specified encoding " + encoding + " is not a valid encoding scheme." );
267 }
268
269 return convertString( isoString );
270 }
271 }