001package daikon.config; 002 003import java.util.regex.Matcher; 004import java.util.regex.Pattern; 005import org.checkerframework.checker.regex.qual.Regex; 006 007/** Supplies a static method htmlToTexinfo that converts HTML to Texinfo format. */ 008public class HtmlToTexinfo { 009 010 private static final String lineSep = System.lineSeparator(); 011 012 public static @Regex(1) Pattern javadocAtCode; 013 014 static { 015 // Javadoc actually permits matched braces. Expand this in the future when needed. 016 // javadocAtCode = Pattern.compile("\\{@code ([^{}]*?)\\}"); 017 javadocAtCode = Pattern.compile("\\{@code[ \n]+([^{}]*?(\\{[^{}]*?\\}[^{}]*?)?)\\}"); 018 } 019 020 /** 021 * Converts Javadoc-flavored HTML to Texinfo. 022 * 023 * <p>In particular, handles extra tags that may occur in Javadoc code. 024 */ 025 public static String javadocHtmlToTexinfo(String s) { 026 027 StringBuilder result = new StringBuilder(); 028 int pos = 0; 029 Matcher m = javadocAtCode.matcher(s); 030 while (m.find(pos)) { 031 result.append(htmlToTexinfo(s.substring(pos, m.start()))); 032 result.append("@code{"); 033 String codeText = s.substring(m.start(1), m.end(1)); 034 String codeTextQuoted = codeText.replace("{", "@{").replace("}", "@}"); 035 result.append(codeTextQuoted); 036 result.append("}"); 037 pos = m.end(); 038 } 039 result.append(htmlToTexinfo(s.substring(pos, s.length()))); 040 return result.toString(); 041 } 042 043 /** Converts HTML to Texinfo. */ 044 public static String htmlToTexinfo(String s) { 045 046 // Remove leading spaces, which throw off Info. 047 s = s.replace(lineSep + " ", lineSep); 048 049 s = s.replace("{", "@{"); 050 s = s.replace("}", "@}"); 051 s = s.replaceAll("(@p?x?ref)@\\{(.*)@\\}", "$1{$2}"); 052 s = s.replace("<br>", "@*"); 053 s = s.replace(lineSep + lineSep + "<p>", lineSep + lineSep); 054 s = s.replace("<p>", "@*@*"); 055 // Sadly, Javadoc prohibits the <samp> tag. Use <code> instead. 056 s = s.replace("<samp>", "@samp{"); 057 s = s.replace("</samp>", "}"); 058 s = s.replace("<code>", "@code{"); 059 s = s.replace("</code>", "}"); 060 s = s.replace(lineSep + "<pre>" + lineSep, lineSep + "@example" + lineSep); 061 s = s.replace("<pre>" + lineSep, lineSep + "@example" + lineSep); 062 s = s.replace(lineSep + "<pre>", lineSep + "@example" + lineSep); 063 s = s.replace("<pre>", lineSep + "@example" + lineSep); 064 s = s.replace(lineSep + "</pre>" + lineSep, lineSep + "@end example" + lineSep); 065 s = s.replace("</pre>" + lineSep, lineSep + "@end example" + lineSep); 066 s = s.replace(lineSep + "</pre>", lineSep + "@end example" + lineSep); 067 s = s.replace("</pre>", lineSep + "@end example" + lineSep); 068 // Catch-all for parameters, filenames, etc. for which there is no specific HTML formatting. 069 // But Javadoc should use <code>...</code> rather than <tt>. 070 s = s.replace("<tt>", "@code{"); 071 s = s.replace("</tt>", "}"); 072 073 // Other HTML formatting to handle in the future 074 // BLOCKQUOTE, "\n\n", "" 075 // LI, "\n@item ", "" 076 // UL, "\n\n@itemize @bullet\n", "\n@end itemize\n" 077 // OL, "\n\n@itemize @bullet\n", "\n@end itemize\n" 078 // MENU, "\n\n@itemize @bullet\n", "\n@end itemize\n" 079 // DIR, "\n\n@itemize @bullet\n", "\n@end itemize\n" 080 // H1, "\n\n@section ", "\n" 081 // H2, "\n\n@section ", "\n" 082 // H3, "\n\n@section ", "\n" 083 // H4, "\n\n@section ", "\n" 084 // A, "", "" 085 // SUP, "^", "" 086 087 s = s.replace(">", ">"); 088 s = s.replace("≥", ">="); 089 s = s.replace("<", "<"); 090 s = s.replace("≤", "<="); 091 s = s.replace("⇒", "->"); 092 093 // & must come last 094 s = s.replace("&", "&"); 095 096 return s; 097 } 098}