001package daikon; 002 003import static java.nio.charset.StandardCharsets.UTF_8; 004 005import java.io.BufferedReader; 006import java.io.BufferedWriter; 007import java.io.FileInputStream; 008import java.io.FileOutputStream; 009import java.io.IOException; 010import java.io.InputStream; 011import java.io.InputStreamReader; 012import java.io.OutputStream; 013import java.io.OutputStreamWriter; 014import java.util.ArrayList; 015import java.util.Enumeration; 016import java.util.HashSet; 017import java.util.zip.GZIPInputStream; 018import java.util.zip.GZIPOutputStream; 019import java.util.zip.ZipEntry; 020import java.util.zip.ZipFile; 021import org.checkerframework.checker.mustcall.qual.Owning; 022import org.checkerframework.dataflow.qual.Pure; 023 024/** 025 * Takes one argument: a .dtrace or dtrace.gz file. Splits it into 100 files: the first file 026 * contains the first 1% of the original file, the second contains 1-2%, ... until the last one 027 * contains 99-100%. 028 */ 029public final class SplitDtrace { 030 /** 031 * Entry point for SplitDtrace, which splits a trace file into 100 parts. 032 * 033 * @param args one argument, the name of the .dtrace or .dtrace.gz file 034 */ 035 public static void main(String[] args) throws IOException { 036 if (args.length != 1) { 037 throw new RuntimeException( 038 "You must supply one argument which is the filename of the dtrace file"); 039 } 040 String filename = args[0].trim(); 041 boolean isGz = filename.endsWith(".dtrace.gz"); 042 if (!filename.endsWith(".dtrace") && !isGz) { 043 throw new RuntimeException( 044 "Filename must end with .dtrace or .dtrace.gz: filename=" + filename); 045 } 046 int declNum = 1; 047 int recNum = 0; 048 try (BufferedReader reader = getStream(filename)) { 049 ArrayList<String> rec = new ArrayList<>(); 050 while (true) { 051 readRec(reader, rec); 052 if (isDeclare(rec)) { 053 break; 054 } 055 } 056 while (true) { 057 readRec(reader, rec); 058 if (rec.size() == 0) { 059 break; 060 } 061 if (isDeclare(rec)) { 062 declNum++; 063 } else { 064 recNum++; 065 } 066 } 067 } 068 069 System.out.println( 070 "Number of DECLARE statements: " + declNum + " and number of records is: " + recNum); 071 072 // DecimalFormat formatter = new DecimalFormat("000"); 073 // for (int i = 1; i<=100; i++) writeDtrace(filename, formatter.format(i), 0, 2+recNum*i/200); 074 writeDtrace(filename, "second-half", recNum / 2, 2 + recNum); 075 } 076 077 private static void writeDtrace(String filename, String out_name, int fromRec, int toRec) 078 throws IOException { 079 String out = filename.replace(".dtrace", "." + out_name + ".dtrace"); 080 System.out.println("Writing file " + out); 081 try (FileOutputStream fos = new FileOutputStream(out); 082 OutputStream output = filename.endsWith(".dtrace.gz") ? new GZIPOutputStream(fos) : fos; 083 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output, UTF_8)); 084 BufferedReader reader = getStream(filename)) { 085 086 int currRecCount = 0; 087 HashSet<Integer> nonceSet = new HashSet<>(); 088 ArrayList<String> rec = new ArrayList<>(); 089 while (true) { 090 readRec(reader, rec); 091 if (isDeclare(rec)) { 092 writer.newLine(); 093 } 094 writeRec(writer, rec); 095 if (isDeclare(rec)) { 096 break; 097 } 098 } 099 while (true) { 100 readRec(reader, rec); 101 if (rec.size() == 0) { 102 break; 103 } 104 boolean isDecl = isDeclare(rec); 105 if ((currRecCount >= fromRec || isDecl) && currRecCount <= toRec) { 106 boolean shouldWrite = true; 107 if (!isDecl) { 108 int nonce = getNonce(rec); 109 if (isEnter(rec)) { 110 nonceSet.add(nonce); 111 } else { 112 if (!isExit(rec)) { 113 throw new RuntimeException("Must be either ENTER or EXIT:" + rec); 114 } 115 if (!nonceSet.contains(nonce)) { 116 shouldWrite = false; 117 } 118 nonceSet.remove(nonce); 119 } 120 } 121 if (shouldWrite) { 122 writeRec(writer, rec); 123 } 124 } 125 if (!isDecl) { 126 currRecCount++; 127 } 128 } 129 } 130 } 131 132 static int getNonce(ArrayList<String> res) { 133 for (int i = 0; i < res.size(); i++) { 134 if (res.get(i).equals("this_invocation_nonce")) { 135 return Integer.parseInt(res.get(i + 1)); 136 } 137 } 138 throw new RuntimeException("no nonce: " + res); 139 } 140 141 @Pure 142 static boolean isEnter(ArrayList<String> res) { 143 return res.get(0).contains(":::ENTER"); 144 } 145 146 @Pure 147 static boolean isExit(ArrayList<String> res) { 148 return res.get(0).contains(":::EXIT"); 149 } 150 151 @Pure 152 static boolean isDeclare(ArrayList<String> res) { 153 return res.get(0).equals("DECLARE"); 154 } 155 156 static void writeRec(BufferedWriter writer, ArrayList<String> res) throws IOException { 157 for (String s : res) { 158 writer.write(s); 159 writer.newLine(); 160 } 161 writer.newLine(); 162 } 163 164 @SuppressWarnings( 165 "all:purity") // non-deterministic call to trim is used only for equals(), does not affect 166 // result 167 @Pure 168 static boolean isEmpty(String l) { 169 return l.trim().equals("") || l.startsWith("#"); 170 } 171 172 static void readRec(BufferedReader reader, ArrayList<String> res) throws IOException { 173 res.clear(); 174 String line; 175 while ((line = reader.readLine()) != null) { 176 if (!isEmpty(line)) { 177 break; 178 } 179 } // eat white space 180 while (line != null) { 181 line = line.trim(); 182 if (isEmpty(line)) { 183 break; 184 } 185 res.add(line.trim()); 186 line = reader.readLine(); 187 } 188 } 189 190 @SuppressWarnings({ 191 "JdkObsolete", // ZipFile uses Enumeration 192 "builder:required.method.not.called" // @MustCall flows through an enumeration 193 }) 194 static @Owning BufferedReader getStream(String filename) throws IOException { 195 InputStream stream = null; // dummy initialization for compiler's definite assignment check 196 ZipFile zipfile = null; // declare outside try so that it can be closed if an exception occurs 197 try { 198 if (filename.endsWith(".dtrace.zip")) { 199 zipfile = new ZipFile(filename); 200 Enumeration<? extends ZipEntry> e = zipfile.entries(); 201 if (!e.hasMoreElements()) { 202 throw new RuntimeException("No entries in the gz"); 203 } 204 ZipEntry entry = e.nextElement(); 205 if (e.hasMoreElements()) { 206 throw new RuntimeException("More than one entry in the gz"); 207 } 208 stream = zipfile.getInputStream(entry); 209 assert stream != null : "@AssumeAssertion(nullness): just tested that one entry exists"; 210 } else { 211 stream = new FileInputStream(filename); 212 if (filename.endsWith(".dtrace.gz")) { 213 stream = new GZIPInputStream(stream); 214 } 215 } 216 } catch (IOException e) { 217 if (zipfile != null) { 218 try { 219 zipfile.close(); 220 } catch (IOException e2) { 221 // do nothing 222 } 223 } 224 if (stream != null) { 225 try { 226 stream.close(); 227 } catch (IOException e2) { 228 // do nothing 229 } 230 } 231 throw e; 232 } 233 return new BufferedReader(new InputStreamReader(stream, "ISO-8859-1")); 234 } 235}