001    package org.rakeshv.tex;
002    
003    import java.io.*;
004    import java.util.*;
005    
006    /**
007            *       <p>Implements a <code>HashMap</code> that maps accentuated
008            *       ISO character entities to their 
009            *       L<sup><small>A</small></sup>T<sub><small>E</small></sub>X 
010            *       equivalents.</p>
011            *
012            *       <p>See <a href='http://www.w3.org/TR/html4/sgml/entities.html'>ISO Character Entities</a>
013            *       for a full description of the character entities.</p>
014            *
015            *       @author Rakesh Vidyadharan on 13 January, 2002
016            *
017            *       <p>Copyright &copy; 2002 Rakesh Vidyadharan</p>
018            */
019    public final class ISOToTeX
020    {
021            /**
022                    *       The <code>HashMap</code> that holds the character mappings.
023                    */
024            private static Map map = new HashMap();
025    
026            /**
027                    *       Static initialiser to populate the <code>HashMap</code>.
028                    */
029            static
030            {
031                    map.put( new Integer( 130 ), "'" );
032                    map.put( new Integer( 132 ), "''" );
033                    map.put( new Integer( 133 ), "{\\dots}" );
034                    map.put( new Integer( 134 ), "{\\dag}" );
035                    map.put( new Integer( 135 ), "{\\ddag}" );
036                    map.put( new Integer( 136 ), "{\\^{ }}" );
037                    map.put( new Integer( 139 ), "<" );
038                    map.put( new Integer( 140 ), "{\\OE}" );
039                    map.put( new Integer( 145 ), "`" );
040                    map.put( new Integer( 146 ), "'" );
041                    map.put( new Integer( 147 ), "``" );
042                    map.put( new Integer( 148 ), "''" );
043                    map.put( new Integer( 149 ), "{\\bullet}" );
044                    map.put( new Integer( 150 ), "{\\textendash}" );
045                    map.put( new Integer( 151 ), "{\\textemdash}" );
046                    map.put( new Integer( 152 ), "{\\~{ }}" );
047                    map.put( new Integer( 153 ), "{\\ensuremath{^\\mathrm{{\\small TM}}}}" );
048                    map.put( new Integer( 155 ), ">" );
049                    map.put( new Integer( 156 ), "{\\oe}" );
050                    map.put( new Integer( 159 ), "\\\"{Y}" );
051                    map.put( new Integer( 160 ), "{\\~{ }}" );
052                    map.put( new Integer( 161 ), "{!`}" );
053                    map.put( new Integer( 163 ), "{\\pounds}" );
054                    map.put( new Integer( 167 ), "{\\S}" );
055                    map.put( new Integer( 168 ), "\\\"{ }" );
056                    map.put( new Integer( 169 ), "{\\copyright}" );
057                    map.put( new Integer( 170 ), "{\\ensuremath{^{\\tiny \\underline{a}}}" );
058                    map.put( new Integer( 171 ), "{\\ensuremath{\\ll}}" );
059                    map.put( new Integer( 172 ), "{\\ensuremath{\\neg}}" );
060                    map.put( new Integer( 173 ), "{\\textendash}" );
061                    map.put( new Integer( 174 ), "\\ensuremath{^{\\ooalign{\\hfil\\raise.07ex\\hbox{\\rm\\tiny R}\\hfil\\crcr{\\scriptsize\\mathhexbox20D}}}}" );
062                    map.put( new Integer( 175 ), "{\\ensuremath{^-}}" );
063                    map.put( new Integer( 176 ), "{\\ensuremath{^\\circ}}" );
064                    map.put( new Integer( 177 ), "{\\ensuremath{^\\pm}}" );
065                    map.put( new Integer( 178 ), "{\\ensuremath{^2}}" );
066                    map.put( new Integer( 179 ), "{\\ensuremath{^3}}" );
067                    map.put( new Integer( 180 ), "{\\ensuremath{^\\prime}}" );
068                    map.put( new Integer( 181 ), "{\\ensuremath{^\\mu}}" );
069                    map.put( new Integer( 182 ), "{\\P}" );
070                    map.put( new Integer( 183 ), "{\\ensuremath{^\\cdot}}" );
071                    map.put( new Integer( 184 ), "{\\c{ }}" );
072                    map.put( new Integer( 185 ), "{\\ensuremath{^1}}" );
073                    map.put( new Integer( 186 ), "{\\ensuremath{^{\\tiny \\underline{o}}}" );
074                    map.put( new Integer( 187 ), "{\\ensuremath{^gg}}" );
075                    map.put( new Integer( 188 ), "{\\ensuremath{\\frac{1}{4}}}" );
076                    map.put( new Integer( 189 ), "{\\ensuremath{\\frac{1}{2}}}" );
077                    map.put( new Integer( 190 ), "{\\ensuremath{\\frac{3}{4}}}" );
078                    map.put( new Integer( 191 ), "{?`}}" );
079                    map.put( new Integer( 192 ), "\\`{A}" );
080                    map.put( new Integer( 193 ), "\\'{A}" );
081                    map.put( new Integer( 194 ), "\\^{A}" );
082                    map.put( new Integer( 195 ), "\\~{A}" );
083                    map.put( new Integer( 196 ), "\\\"{A}" );
084                    map.put( new Integer( 197 ), "\\r{A}" );
085                    map.put( new Integer( 198 ), "\\{AE}" );
086                    map.put( new Integer( 199 ), "\\c{C}" );
087                    map.put( new Integer( 200 ), "\\`{E}" );
088                    map.put( new Integer( 201 ), "\\'{E}" );
089                    map.put( new Integer( 202 ), "\\^{E}" );
090                    map.put( new Integer( 203 ), "\\\"{E}" );
091                    map.put( new Integer( 204 ), "\\`{I}" );
092                    map.put( new Integer( 205 ), "\\'{I}" );
093                    map.put( new Integer( 206 ), "\\^{I}" );
094                    map.put( new Integer( 207 ), "\\\"{I}" );
095                    map.put( new Integer( 209 ), "\\~{N}" );
096                    map.put( new Integer( 210 ), "\\`{O}" );
097                    map.put( new Integer( 211 ), "\\'{O}" );
098                    map.put( new Integer( 212 ), "\\^{O}" );
099                    map.put( new Integer( 213 ), "\\~{O}" );
100                    map.put( new Integer( 214 ), "\\\"{O}" );
101                    map.put( new Integer( 215 ), "{\\ensuremath{\\times}" );
102                    map.put( new Integer( 216 ), "{\\O}" );
103                    map.put( new Integer( 217 ), "\\`{U}" );
104                    map.put( new Integer( 218 ), "\\'{U}" );
105                    map.put( new Integer( 219 ), "\\^{U}" );
106                    map.put( new Integer( 220 ), "\\\"{U}" );
107                    map.put( new Integer( 221 ), "\\'{Y}" );
108                    map.put( new Integer( 223 ), "{\\ss}" );
109                    map.put( new Integer( 224 ), "\\`{a}" );
110                    map.put( new Integer( 225 ), "\\'{a}" );
111                    map.put( new Integer( 226 ), "\\^{a}" );
112                    map.put( new Integer( 227 ), "\\~{a}" );
113                    map.put( new Integer( 228 ), "\\\"{a}" );
114                    map.put( new Integer( 229 ), "\\r{a}" );
115                    map.put( new Integer( 230 ), "{\\ae}" );
116                    map.put( new Integer( 231 ), "\\c{c}" );
117                    map.put( new Integer( 232 ), "\\`{e}" );
118                    map.put( new Integer( 233 ), "\\'{e}" );
119                    map.put( new Integer( 234 ), "\\^{e}" );
120                    map.put( new Integer( 235 ), "\\\"{e}" );
121                    map.put( new Integer( 236 ), "\\`{i}" );
122                    map.put( new Integer( 237 ), "\\'{i}" );
123                    map.put( new Integer( 238 ), "\\^{i}" );
124                    map.put( new Integer( 239 ), "\\\"{i}" );
125                    map.put( new Integer( 241 ), "\\~{n}" );
126                    map.put( new Integer( 242 ), "\\`{o}" );
127                    map.put( new Integer( 243 ), "\\'{o}" );
128                    map.put( new Integer( 244 ), "\\^{o}" );
129                    map.put( new Integer( 245 ), "\\~{o}" );
130                    map.put( new Integer( 246 ), "\\\"{o}" );
131                    map.put( new Integer( 247 ), "{\\ensuremath{\\div}}" );
132                    map.put( new Integer( 248 ), "{\\o}" );
133                    map.put( new Integer( 249 ), "\\`{u}" );
134                    map.put( new Integer( 250 ), "\\'{u}" );
135                    map.put( new Integer( 251 ), "\\^{u}" );
136                    map.put( new Integer( 252 ), "\\\"{u}" );
137                    map.put( new Integer( 253 ), "\\'{y}" );
138                    map.put( new Integer( 255 ), "\\\"{y}" );
139            }
140    
141            /**
142                    *       Specify that you wish to get back the input character as a
143                    *       <code>String</code> equivalent, if no match is found in the
144                    *       {@link #map HashMap}.
145                    */
146            public static final boolean DO_NULL_CHECK = true;
147    
148            /**
149                    *       Private constructor.  This class cannot be instantiated.
150                    */
151            private void ISOToTeX() {}
152    
153            /**
154                    *       <p>Returns the
155                    *       L<sup><small>A</small></sup>T<sub><small>E</small></sub>X
156                    *       equivalent for the specified <code>char</code>.</p>
157                    *
158                    *       <p>If the input character equivalent is not found, this
159                    *       method returns <code>null</code>.  You will have to check
160                    *       the return value, and take appropriate action.</p>
161                    *
162                    *       @param input - The character that is to be transformed.
163                    *       @return String - The LaTeX equivalent.
164                    */
165            public static String getTeXEquivalent( char input )
166            {
167                    return ( (String) map.get( new Integer( input ) ) );
168            }
169    
170            /**
171                    *       <p>Returns the
172                    *       L<sup><small>A</small></sup>T<sub><small>E</small></sub>X
173                    *       equivalent for the specified <code>char</code>.</p>
174                    *
175                    *       <p>If the input character equivalent is not found, and
176                    *       checkFlag is specified as {@link #DO_NULL_CHECK DO_NULL_CHECK},
177                    *       then, a <code>String</code> representation of the input
178                    *       character is returned.</p>
179                    *
180                    *       @param input - The character that is to be transformed.
181                    *       @param checkFlag - Specify that checking for existence
182                    *               of ASCII equivalent should be done in the method or not.  To
183                    *               enable checking, specify {@link #DO_NULL_CHECK DO_NULL_CHECK}.
184                    *       @return String - The LaTeX equivalent.
185                    */
186            public static String getTeXEquivalent( char input, 
187                    boolean checkFlag )
188            {
189                    if ( checkFlag == DO_NULL_CHECK )
190                    {
191                            String result = (String) map.get( new Integer( input ) );
192                            if ( result == null )
193                            {
194                                    return ( String.valueOf( input ) );
195                            }
196    
197                            return result;
198                    }
199                    else
200                    {
201                            return getTeXEquivalent( input );
202                    }
203            }
204    
205            /**
206                    *       <p>Convert all ISO 8859-1 encoded non-ASCII characters to
207                    *       appropriate L<sup>A</sup>T<sub>E</sub>X tags, and return the
208                    *       modified <code>String</code> object.</p>
209                    *
210                    *       @param inString - The String object that is to be modified.
211                    *       @return String - The modified String object.
212                    */
213            public static String convertString( String inString )
214            {
215                    StringBuffer sbuf = new StringBuffer();
216    
217                    for ( int i = 0; i < inString.length(); ++i )
218                    {
219                            char currentChar = inString.charAt( i );
220                            if ( currentChar > 127 )
221                            {
222                                    String equivalent = (String) map.get( new Integer( currentChar ) );
223                                    if ( equivalent == null )
224                                    {
225                                            System.out.println( "Current character not matched " + currentChar );
226                                            sbuf.append( currentChar );
227                                    }
228                                    else
229                                    {
230                                            sbuf.append( equivalent );
231                                    }
232                            }
233                            else
234                            {
235                                    sbuf.append( currentChar );
236                            }
237                    }
238    
239                    return sbuf.toString();
240            }
241    
242            /**
243                    *       <p>Converts the input <code>String</code> in the specified
244                    *       character encoding to <code>ISO 8859-1</code>
245                    *       encoding, and then invokes {@link #convertString( String )
246                    *       convertString( String )}.</p>
247                    *
248                    *       @param inString - The String object in Windows Cp1252 format
249                    *               that is to be converted.
250                    *       @param encoding - The character encoding of input string.
251                    *       @return String - The modified String object.
252                    */
253            public static String convertString( String inString, String encoding )
254            {
255                    String text = inString;
256                    String isoString;
257    
258                    try
259                    {
260                            byte[] byteArray = text.getBytes( encoding );
261                            isoString = new String( byteArray, "ISO-8859-1" );
262                    }
263                    catch ( UnsupportedEncodingException ueex )
264                    {
265                            isoString = text;
266                            System.err.println( "The specified encoding " + encoding + " is not a valid encoding scheme." );
267                    }
268    
269                    return convertString( isoString );
270            }
271    }