001package daikon; 002 003import static java.nio.charset.StandardCharsets.UTF_8; 004 005import java.io.BufferedReader; 006import java.io.BufferedWriter; 007import java.io.FileInputStream; 008import java.io.FileOutputStream; 009import java.io.IOException; 010import java.io.InputStream; 011import java.io.InputStreamReader; 012import java.io.OutputStream; 013import java.io.OutputStreamWriter; 014import java.util.ArrayList; 015import java.util.Enumeration; 016import java.util.HashSet; 017import java.util.List; 018import java.util.zip.GZIPInputStream; 019import java.util.zip.GZIPOutputStream; 020import java.util.zip.ZipEntry; 021import java.util.zip.ZipFile; 022import org.checkerframework.checker.mustcall.qual.Owning; 023import org.checkerframework.dataflow.qual.Pure; 024 025/** 026 * Takes one argument: a .dtrace or dtrace.gz file. Splits it into 100 files: the first file 027 * contains the first 1% of the original file, the second contains 1-2%, ... until the last one 028 * contains 99-100%. 029 */ 030public final class SplitDtrace { 031 /** 032 * Entry point for SplitDtrace, which splits a trace file into 100 parts. 033 * 034 * @param args one argument, the name of the .dtrace or .dtrace.gz file 035 */ 036 public static void main(String[] args) throws IOException { 037 if (args.length != 1) { 038 throw new RuntimeException( 039 "You must supply one argument which is the filename of the dtrace file"); 040 } 041 String filename = args[0].trim(); 042 boolean isGz = filename.endsWith(".dtrace.gz"); 043 if (!filename.endsWith(".dtrace") && !isGz) { 044 throw new RuntimeException( 045 "Filename must end with .dtrace or .dtrace.gz: filename=" + filename); 046 } 047 int declNum = 1; 048 int recNum = 0; 049 try (BufferedReader reader = getStream(filename)) { 050 ArrayList<String> rec = new ArrayList<>(); 051 while (true) { 052 readRec(reader, rec); 053 if (isDeclare(rec)) { 054 break; 055 } 056 } 057 while (true) { 058 readRec(reader, rec); 059 if (rec.size() == 0) { 060 break; 061 } 062 if (isDeclare(rec)) { 063 declNum++; 064 } else { 065 recNum++; 066 } 067 } 068 } 069 070 System.out.println( 071 "Number of DECLARE statements: " + declNum + " and number of records is: " + recNum); 072 073 // DecimalFormat formatter = new DecimalFormat("000"); 074 // for (int i = 1; i<=100; i++) writeDtrace(filename, formatter.format(i), 0, 2+recNum*i/200); 075 writeDtrace(filename, "second-half", recNum / 2, 2 + recNum); 076 } 077 078 private static void writeDtrace(String filename, String out_name, int fromRec, int toRec) 079 throws IOException { 080 String out = filename.replace(".dtrace", "." + out_name + ".dtrace"); 081 System.out.println("Writing file " + out); 082 try (FileOutputStream fos = new FileOutputStream(out); 083 OutputStream output = filename.endsWith(".dtrace.gz") ? new GZIPOutputStream(fos) : fos; 084 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output, UTF_8)); 085 BufferedReader reader = getStream(filename)) { 086 087 int currRecCount = 0; 088 HashSet<Integer> nonceSet = new HashSet<>(); 089 ArrayList<String> rec = new ArrayList<>(); 090 while (true) { 091 readRec(reader, rec); 092 if (isDeclare(rec)) { 093 writer.newLine(); 094 } 095 writeRec(writer, rec); 096 if (isDeclare(rec)) { 097 break; 098 } 099 } 100 while (true) { 101 readRec(reader, rec); 102 if (rec.size() == 0) { 103 break; 104 } 105 boolean isDecl = isDeclare(rec); 106 if ((currRecCount >= fromRec || isDecl) && currRecCount <= toRec) { 107 boolean shouldWrite = true; 108 if (!isDecl) { 109 int nonce = getNonce(rec); 110 if (isEnter(rec)) { 111 nonceSet.add(nonce); 112 } else { 113 if (!isExit(rec)) { 114 throw new RuntimeException("Must be either ENTER or EXIT:" + rec); 115 } 116 if (!nonceSet.contains(nonce)) { 117 shouldWrite = false; 118 } 119 nonceSet.remove(nonce); 120 } 121 } 122 if (shouldWrite) { 123 writeRec(writer, rec); 124 } 125 } 126 if (!isDecl) { 127 currRecCount++; 128 } 129 } 130 } 131 } 132 133 /** 134 * Returns the value of the nonce variable in the ppt. 135 * 136 * @param res a list of ppt records 137 * @return the value of the nonce variable 138 */ 139 static int getNonce(List<String> res) { 140 for (int i = 0; i < res.size(); i++) { 141 if (res.get(i).equals("this_invocation_nonce")) { 142 return Integer.parseInt(res.get(i + 1)); 143 } 144 } 145 throw new RuntimeException("no nonce: " + res); 146 } 147 148 /** 149 * Returns true if the given string is an entry program point name. 150 * 151 * @param res a program point name 152 * @return true if the argument is an entry program point name 153 */ 154 @Pure 155 static boolean isEnter(List<String> res) { 156 return res.get(0).contains(":::ENTER"); 157 } 158 159 /** 160 * Returns true if the given string is an exit program point name. 161 * 162 * @param res a program point name 163 * @return true if the argument is an exit program point name 164 */ 165 @Pure 166 static boolean isExit(List<String> res) { 167 return res.get(0).contains(":::EXIT"); 168 } 169 170 /** 171 * Returns true if the given line starts a program point declaration 172 * 173 * @param res a line from a .decls or .dtrace file 174 * @return true if the given line starts a program point declaration 175 */ 176 @Pure 177 static boolean isDeclare(List<String> res) { 178 return res.get(0).equals("DECLARE"); 179 } 180 181 /** 182 * Writes a .dtrace record to a file. 183 * 184 * @param writer the BufferedWriter to use for output 185 * @param res the lines of a .dtrace record 186 * @throws IOException if there is a problem writing 187 */ 188 static void writeRec(BufferedWriter writer, List<String> res) throws IOException { 189 for (String s : res) { 190 writer.write(s); 191 writer.newLine(); 192 } 193 writer.newLine(); 194 } 195 196 @SuppressWarnings( 197 "all:purity") // non-deterministic call to trim is used only for equals(), does not affect 198 // result 199 @Pure 200 static boolean isEmpty(String l) { 201 return l.trim().equals("") || l.startsWith("#"); 202 } 203 204 /** 205 * Reads a record from a .dtrace file. 206 * 207 * <p>This method modifies a list argument rather than returning a new list, for efficiency. 208 * 209 * @param reader the BufferedReader to use for input 210 * @param res a list that will be filled with the lines of a .dtrace record 211 * @throws IOException if there is a problem reading 212 */ 213 static void readRec(BufferedReader reader, List<String> res) throws IOException { 214 res.clear(); 215 String line; 216 while ((line = reader.readLine()) != null) { 217 if (!isEmpty(line)) { 218 break; 219 } 220 } // eat white space 221 while (line != null) { 222 line = line.trim(); 223 if (isEmpty(line)) { 224 break; 225 } 226 res.add(line.trim()); 227 line = reader.readLine(); 228 } 229 } 230 231 @SuppressWarnings({ 232 "JdkObsolete", // ZipFile uses Enumeration 233 "builder:required.method.not.called" // @MustCall flows through an enumeration 234 }) 235 static @Owning BufferedReader getStream(String filename) throws IOException { 236 InputStream stream = null; // dummy initialization for compiler's definite assignment check 237 ZipFile zipfile = null; // declare outside try so that it can be closed if an exception occurs 238 try { 239 if (filename.endsWith(".dtrace.zip")) { 240 zipfile = new ZipFile(filename); 241 Enumeration<? extends ZipEntry> e = zipfile.entries(); 242 if (!e.hasMoreElements()) { 243 throw new RuntimeException("No entries in the gz"); 244 } 245 ZipEntry entry = e.nextElement(); 246 if (e.hasMoreElements()) { 247 throw new RuntimeException("More than one entry in the gz"); 248 } 249 stream = zipfile.getInputStream(entry); 250 assert stream != null : "@AssumeAssertion(nullness): just tested that one entry exists"; 251 } else { 252 stream = new FileInputStream(filename); 253 if (filename.endsWith(".dtrace.gz")) { 254 stream = new GZIPInputStream(stream); 255 } 256 } 257 } catch (IOException e) { 258 if (zipfile != null) { 259 try { 260 zipfile.close(); 261 } catch (IOException e2) { 262 // do nothing 263 } 264 } 265 if (stream != null) { 266 try { 267 stream.close(); 268 } catch (IOException e2) { 269 // do nothing 270 } 271 } 272 throw e; 273 } 274 return new BufferedReader(new InputStreamReader(stream, "ISO-8859-1")); 275 } 276}