001package daikon;
002
003import static java.nio.charset.StandardCharsets.UTF_8;
004
005import java.io.BufferedReader;
006import java.io.BufferedWriter;
007import java.io.FileInputStream;
008import java.io.FileOutputStream;
009import java.io.IOException;
010import java.io.InputStream;
011import java.io.InputStreamReader;
012import java.io.OutputStream;
013import java.io.OutputStreamWriter;
014import java.util.ArrayList;
015import java.util.Enumeration;
016import java.util.HashSet;
017import java.util.zip.GZIPInputStream;
018import java.util.zip.GZIPOutputStream;
019import java.util.zip.ZipEntry;
020import java.util.zip.ZipFile;
021import org.checkerframework.checker.mustcall.qual.Owning;
022import org.checkerframework.dataflow.qual.Pure;
023
024/**
025 * Takes one argument: a .dtrace or dtrace.gz file. Splits it into 100 files: the first file
026 * contains the first 1% of the original file, the second contains 1-2%, ... until the last one
027 * contains 99-100%.
028 */
029public final class SplitDtrace {
030  /**
031   * Entry point for SplitDtrace, which splits a trace file into 100 parts.
032   *
033   * @param args one argument, the name of the .dtrace or .dtrace.gz file
034   */
035  public static void main(String[] args) throws IOException {
036    if (args.length != 1) {
037      throw new RuntimeException(
038          "You must supply one argument which is the filename of the dtrace file");
039    }
040    String filename = args[0].trim();
041    boolean isGz = filename.endsWith(".dtrace.gz");
042    if (!filename.endsWith(".dtrace") && !isGz) {
043      throw new RuntimeException(
044          "Filename must end with .dtrace or .dtrace.gz: filename=" + filename);
045    }
046    int declNum = 1;
047    int recNum = 0;
048    try (BufferedReader reader = getStream(filename)) {
049      ArrayList<String> rec = new ArrayList<>();
050      while (true) {
051        readRec(reader, rec);
052        if (isDeclare(rec)) {
053          break;
054        }
055      }
056      while (true) {
057        readRec(reader, rec);
058        if (rec.size() == 0) {
059          break;
060        }
061        if (isDeclare(rec)) {
062          declNum++;
063        } else {
064          recNum++;
065        }
066      }
067    }
068
069    System.out.println(
070        "Number of DECLARE statements: " + declNum + " and number of records is: " + recNum);
071
072    // DecimalFormat formatter = new DecimalFormat("000");
073    // for (int i = 1; i<=100; i++) writeDtrace(filename, formatter.format(i), 0, 2+recNum*i/200);
074    writeDtrace(filename, "second-half", recNum / 2, 2 + recNum);
075  }
076
077  private static void writeDtrace(String filename, String out_name, int fromRec, int toRec)
078      throws IOException {
079    String out = filename.replace(".dtrace", "." + out_name + ".dtrace");
080    System.out.println("Writing file " + out);
081    try (FileOutputStream fos = new FileOutputStream(out);
082        OutputStream output = filename.endsWith(".dtrace.gz") ? new GZIPOutputStream(fos) : fos;
083        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output, UTF_8));
084        BufferedReader reader = getStream(filename)) {
085
086      int currRecCount = 0;
087      HashSet<Integer> nonceSet = new HashSet<>();
088      ArrayList<String> rec = new ArrayList<>();
089      while (true) {
090        readRec(reader, rec);
091        if (isDeclare(rec)) {
092          writer.newLine();
093        }
094        writeRec(writer, rec);
095        if (isDeclare(rec)) {
096          break;
097        }
098      }
099      while (true) {
100        readRec(reader, rec);
101        if (rec.size() == 0) {
102          break;
103        }
104        boolean isDecl = isDeclare(rec);
105        if ((currRecCount >= fromRec || isDecl) && currRecCount <= toRec) {
106          boolean shouldWrite = true;
107          if (!isDecl) {
108            int nonce = getNonce(rec);
109            if (isEnter(rec)) {
110              nonceSet.add(nonce);
111            } else {
112              if (!isExit(rec)) {
113                throw new RuntimeException("Must be either ENTER or EXIT:" + rec);
114              }
115              if (!nonceSet.contains(nonce)) {
116                shouldWrite = false;
117              }
118              nonceSet.remove(nonce);
119            }
120          }
121          if (shouldWrite) {
122            writeRec(writer, rec);
123          }
124        }
125        if (!isDecl) {
126          currRecCount++;
127        }
128      }
129    }
130  }
131
132  static int getNonce(ArrayList<String> res) {
133    for (int i = 0; i < res.size(); i++) {
134      if (res.get(i).equals("this_invocation_nonce")) {
135        return Integer.parseInt(res.get(i + 1));
136      }
137    }
138    throw new RuntimeException("no nonce: " + res);
139  }
140
141  @Pure
142  static boolean isEnter(ArrayList<String> res) {
143    return res.get(0).contains(":::ENTER");
144  }
145
146  @Pure
147  static boolean isExit(ArrayList<String> res) {
148    return res.get(0).contains(":::EXIT");
149  }
150
151  @Pure
152  static boolean isDeclare(ArrayList<String> res) {
153    return res.get(0).equals("DECLARE");
154  }
155
156  static void writeRec(BufferedWriter writer, ArrayList<String> res) throws IOException {
157    for (String s : res) {
158      writer.write(s);
159      writer.newLine();
160    }
161    writer.newLine();
162  }
163
164  @SuppressWarnings(
165      "all:purity") // non-deterministic call to trim is used only for equals(), does not affect
166  // result
167  @Pure
168  static boolean isEmpty(String l) {
169    return l.trim().equals("") || l.startsWith("#");
170  }
171
172  static void readRec(BufferedReader reader, ArrayList<String> res) throws IOException {
173    res.clear();
174    String line;
175    while ((line = reader.readLine()) != null) {
176      if (!isEmpty(line)) {
177        break;
178      }
179    } // eat white space
180    while (line != null) {
181      line = line.trim();
182      if (isEmpty(line)) {
183        break;
184      }
185      res.add(line.trim());
186      line = reader.readLine();
187    }
188  }
189
190  @SuppressWarnings({
191    "JdkObsolete", // ZipFile uses Enumeration
192    "builder:required.method.not.called" // @MustCall flows through an enumeration
193  })
194  static @Owning BufferedReader getStream(String filename) throws IOException {
195    InputStream stream = null; // dummy initialization for compiler's definite assignment check
196    ZipFile zipfile = null; // declare outside try so that it can be closed if an exception occurs
197    try {
198      if (filename.endsWith(".dtrace.zip")) {
199        zipfile = new ZipFile(filename);
200        Enumeration<? extends ZipEntry> e = zipfile.entries();
201        if (!e.hasMoreElements()) {
202          throw new RuntimeException("No entries in the gz");
203        }
204        ZipEntry entry = e.nextElement();
205        if (e.hasMoreElements()) {
206          throw new RuntimeException("More than one entry in the gz");
207        }
208        stream = zipfile.getInputStream(entry);
209        assert stream != null : "@AssumeAssertion(nullness): just tested that one entry exists";
210      } else {
211        stream = new FileInputStream(filename);
212        if (filename.endsWith(".dtrace.gz")) {
213          stream = new GZIPInputStream(stream);
214        }
215      }
216    } catch (IOException e) {
217      if (zipfile != null) {
218        try {
219          zipfile.close();
220        } catch (IOException e2) {
221          // do nothing
222        }
223      }
224      if (stream != null) {
225        try {
226          stream.close();
227        } catch (IOException e2) {
228          // do nothing
229        }
230      }
231      throw e;
232    }
233    return new BufferedReader(new InputStreamReader(stream, "ISO-8859-1"));
234  }
235}