001package daikon.config;
002
003import java.util.regex.Matcher;
004import java.util.regex.Pattern;
005import org.checkerframework.checker.regex.qual.Regex;
006
007/** Supplies a static method htmlToTexinfo that converts HTML to Texinfo format. */
008public class HtmlToTexinfo {
009
010  private static final String lineSep = System.lineSeparator();
011
012  public static @Regex(1) Pattern javadocAtCode;
013
014  static {
015    // Javadoc actually permits matched braces.  Expand this in the future when needed.
016    // javadocAtCode = Pattern.compile("\\{@code ([^{}]*?)\\}");
017    javadocAtCode = Pattern.compile("\\{@code[ \n]+([^{}]*?(\\{[^{}]*?\\}[^{}]*?)?)\\}");
018  }
019
020  /**
021   * Converts Javadoc-flavored HTML to Texinfo.
022   *
023   * <p>In particular, handles extra tags that may occur in Javadoc code.
024   */
025  public static String javadocHtmlToTexinfo(String s) {
026
027    StringBuilder result = new StringBuilder();
028    int pos = 0;
029    Matcher m = javadocAtCode.matcher(s);
030    while (m.find(pos)) {
031      result.append(htmlToTexinfo(s.substring(pos, m.start())));
032      result.append("@code{");
033      String codeText = s.substring(m.start(1), m.end(1));
034      String codeTextQuoted = codeText.replace("{", "@{").replace("}", "@}");
035      result.append(codeTextQuoted);
036      result.append("}");
037      pos = m.end();
038    }
039    result.append(htmlToTexinfo(s.substring(pos, s.length())));
040    return result.toString();
041  }
042
043  /** Converts HTML to Texinfo. */
044  public static String htmlToTexinfo(String s) {
045
046    // Remove leading spaces, which throw off Info.
047    s = s.replace(lineSep + " ", lineSep);
048
049    s = s.replace("{", "@{");
050    s = s.replace("}", "@}");
051    s = s.replaceAll("(@p?x?ref)@\\{(.*)@\\}", "$1{$2}");
052    s = s.replace("<br>", "@*");
053    s = s.replace(lineSep + lineSep + "<p>", lineSep + lineSep);
054    s = s.replace("<p>", "@*@*");
055    // Sadly, Javadoc prohibits the <samp> tag.  Use <code> instead.
056    s = s.replace("<samp>", "@samp{");
057    s = s.replace("</samp>", "}");
058    s = s.replace("<code>", "@code{");
059    s = s.replace("</code>", "}");
060    s = s.replace(lineSep + "<pre>" + lineSep, lineSep + "@example" + lineSep);
061    s = s.replace("<pre>" + lineSep, lineSep + "@example" + lineSep);
062    s = s.replace(lineSep + "<pre>", lineSep + "@example" + lineSep);
063    s = s.replace("<pre>", lineSep + "@example" + lineSep);
064    s = s.replace(lineSep + "</pre>" + lineSep, lineSep + "@end example" + lineSep);
065    s = s.replace("</pre>" + lineSep, lineSep + "@end example" + lineSep);
066    s = s.replace(lineSep + "</pre>", lineSep + "@end example" + lineSep);
067    s = s.replace("</pre>", lineSep + "@end example" + lineSep);
068    // Catch-all for parameters, filenames, etc. for which there is no specific HTML formatting.
069    // But Javadoc should use <code>...</code> rather than <tt>.
070    s = s.replace("<tt>", "@code{");
071    s = s.replace("</tt>", "}");
072
073    // Other HTML formatting to handle in the future
074    // BLOCKQUOTE, "\n\n",        ""
075    // LI,         "\n@item ",    ""
076    // UL,         "\n\n@itemize @bullet\n", "\n@end itemize\n"
077    // OL,         "\n\n@itemize @bullet\n", "\n@end itemize\n"
078    // MENU,       "\n\n@itemize @bullet\n", "\n@end itemize\n"
079    // DIR,        "\n\n@itemize @bullet\n", "\n@end itemize\n"
080    // H1,         "\n\n@section ",  "\n"
081    // H2,         "\n\n@section ",  "\n"
082    // H3,         "\n\n@section ",  "\n"
083    // H4,         "\n\n@section ",  "\n"
084    // A,          "", ""
085    // SUP,        "^", ""
086
087    s = s.replace("&gt;", ">");
088    s = s.replace("&ge;", ">=");
089    s = s.replace("&lt;", "<");
090    s = s.replace("&le;", "<=");
091    s = s.replace("&rArr;", "->");
092
093    // &amp must come last
094    s = s.replace("&amp;", "&");
095
096    return s;
097  }
098}