001package daikon.tools;
002
003import static java.nio.charset.StandardCharsets.UTF_8;
004import static java.util.logging.Level.FINE;
005import static java.util.logging.Level.INFO;
006
007import daikon.Daikon;
008import daikon.FileIO;
009import daikon.Global;
010import daikon.Ppt;
011import daikon.PptMap;
012import daikon.PptTopLevel;
013import daikon.VarInfo;
014import daikon.inv.Implication;
015import daikon.inv.Invariant;
016import daikon.inv.OutputFormat;
017import gnu.getopt.*;
018import java.io.BufferedWriter;
019import java.io.File;
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.io.OutputStreamWriter;
023import java.io.PrintWriter;
024import java.util.ArrayList;
025import java.util.Comparator;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.TreeSet;
030import java.util.logging.Logger;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033import java.util.regex.PatternSyntaxException;
034import org.checkerframework.checker.nullness.qual.KeyFor;
035import org.checkerframework.checker.nullness.qual.Nullable;
036import org.plumelib.util.StringsPlume;
037
038/**
039 * Extract the consequents of all Implication invariants that are predicated by membership in a
040 * cluster, from a {@code .inv} file. An example of such an implication would be "(cluster ==
041 * <em>NUM</em>) &rArr; consequent". The consequent is only true in certain clusters, but is not
042 * generally true for all executions of the program point to which the Implication belongs. These
043 * resulting implications are written to standard output in the format of a splitter info file.
044 */
045public class ExtractConsequent {
046
047  public static final Logger debug = Logger.getLogger("daikon.ExtractConsequent");
048  private static final String lineSep = Global.lineSep;
049
050  private static class HashedConsequent {
051    Invariant inv;
052
053    // This field, `fakeFor`, is a simplified/preferred version of the invariant.
054    // We prefer "x < y", "x > y", and "x == y" to the conditions
055    // "x >= y", "x <= y", and "x != y" that (respectively) give the
056    // same split.  When we see a dispreferred form, we index it by
057    // the preferred form, and if there's already an entry (from the
058    // real preferred one) we throw the new one out. Otherwise, we
059    // insert both the dispreferred form and an entry for the
060    // preferred form, with a pointer pack to the dispreferred
061    // form. If we later see the preferred form, we replace the
062    // placeholder and remove the dispreferred form.
063    final @Nullable String fakeFor;
064
065    HashedConsequent(Invariant inv, @Nullable String fakeFor) {
066      this.inv = inv;
067      this.fakeFor = fakeFor;
068    }
069  }
070
071  /* A HashMap whose keys are PPT names (Strings) and whose values are
072  HashMaps whose keys are predicate names (Strings) and whose values are
073   HashMaps whose keys are Strings (normalized java-format invariants)
074     and whose values are HashedConsequent objects. */
075  private static Map<String, Map<String, Map<String, HashedConsequent>>> pptname_to_conditions =
076      new HashMap<>();
077
078  /** The usage message for this program. */
079  private static String usage =
080      StringsPlume.joinLines(
081          "Usage: java daikon.ExtractConsequent [OPTION]... FILE",
082          "  -h, --" + Daikon.help_SWITCH,
083          "      Display this usage message",
084          "  --" + Daikon.suppress_redundant_SWITCH,
085          "      Suppress display of logically redundant invariants.",
086          "  --" + Daikon.debugAll_SWITCH,
087          "      Turn on all debug switches",
088          "  --" + Daikon.debug_SWITCH + " <logger>",
089          "      Turn on the specified debug logger");
090
091  public static void main(String[] args)
092      throws FileNotFoundException, IOException, ClassNotFoundException {
093    try {
094      mainHelper(args);
095    } catch (Daikon.DaikonTerminationException e) {
096      Daikon.handleDaikonTerminationException(e);
097    }
098  }
099
100  /**
101   * This does the work of {@link #main(String[])}, but it never calls System.exit, so it is
102   * appropriate to be called progrmmatically.
103   *
104   * @param args command-line arguments, like those of {@link #main}
105   * @throws IOException if there is trouble reading the file
106   */
107  public static void mainHelper(final String[] args) throws IOException {
108    daikon.LogHelper.setupLogs(INFO);
109    LongOpt[] longopts =
110        new LongOpt[] {
111          new LongOpt(Daikon.suppress_redundant_SWITCH, LongOpt.NO_ARGUMENT, null, 0),
112          new LongOpt(Daikon.config_option_SWITCH, LongOpt.REQUIRED_ARGUMENT, null, 0),
113          new LongOpt(Daikon.debugAll_SWITCH, LongOpt.NO_ARGUMENT, null, 0),
114          new LongOpt(Daikon.debug_SWITCH, LongOpt.REQUIRED_ARGUMENT, null, 0),
115        };
116    Getopt g = new Getopt("daikon.ExtractConsequent", args, "h", longopts);
117    int c;
118    while ((c = g.getopt()) != -1) {
119      switch (c) {
120        case 0:
121          // got a long option
122          String option_name = longopts[g.getLongind()].getName();
123          if (Daikon.help_SWITCH.equals(option_name)) {
124            System.out.println(usage);
125            throw new Daikon.NormalTermination();
126          } else if (Daikon.suppress_redundant_SWITCH.equals(option_name)) {
127            Daikon.suppress_redundant_invariants_with_simplify = true;
128          } else if (Daikon.config_option_SWITCH.equals(option_name)) {
129            String item = Daikon.getOptarg(g);
130            daikon.config.Configuration.getInstance().apply(item);
131            break;
132          } else if (Daikon.debugAll_SWITCH.equals(option_name)) {
133            Global.debugAll = true;
134          } else if (Daikon.debug_SWITCH.equals(option_name)) {
135            daikon.LogHelper.setLevel(Daikon.getOptarg(g), FINE);
136          } else {
137            throw new RuntimeException("Unknown long option received: " + option_name);
138          }
139          break;
140        case 'h':
141          System.out.println(usage);
142          throw new Daikon.NormalTermination();
143        case '?':
144          break; // getopt() already printed an error
145        default:
146          System.out.println("getopt() returned " + c);
147          break;
148      }
149    }
150    // The index of the first non-option argument -- the name of the file
151    int fileIndex = g.getOptind();
152    if (args.length - fileIndex != 1) {
153      throw new Daikon.UserError("Wrong number of arguments." + Daikon.lineSep + usage);
154    }
155    String filename = args[fileIndex];
156    PptMap ppts =
157        FileIO.read_serialized_pptmap(
158            new File(filename), true // use saved config
159            );
160    extract_consequent(ppts);
161  }
162
163  public static void extract_consequent(PptMap ppts) {
164    // Retrieve Ppt objects in sorted order.
165    // Use a custom comparator for a specific ordering
166    Comparator<PptTopLevel> comparator = new Ppt.NameComparator();
167    TreeSet<PptTopLevel> ppts_sorted = new TreeSet<>(comparator);
168    ppts_sorted.addAll(ppts.asCollection());
169
170    for (PptTopLevel ppt : ppts_sorted) {
171      extract_consequent_maybe(ppt, ppts);
172    }
173
174    PrintWriter pw =
175        new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out, UTF_8)), true);
176
177    // All conditions at a program point.  A TreeSet to enable
178    // deterministic output.
179    TreeSet<String> allConds = new TreeSet<>();
180    for (String pptname : pptname_to_conditions.keySet()) {
181      Map<String, Map<String, HashedConsequent>> cluster_to_conditions =
182          pptname_to_conditions.get(pptname);
183      for (Map.Entry<@KeyFor("cluster_to_conditions") String, Map<String, HashedConsequent>> entry :
184          cluster_to_conditions.entrySet()) {
185        Map<String, HashedConsequent> conditions = entry.getValue();
186        StringBuilder conjunctionJava = new StringBuilder();
187        StringBuilder conjunctionDaikon = new StringBuilder();
188        StringBuilder conjunctionESC = new StringBuilder();
189        StringBuilder conjunctionSimplify = new StringBuilder("(AND ");
190        int count = 0;
191        for (Map.Entry<@KeyFor("conditions") String, HashedConsequent> entry2 :
192            conditions.entrySet()) {
193          count++;
194          String condIndex = entry2.getKey();
195          HashedConsequent cond = entry2.getValue();
196          if (cond.fakeFor != null) {
197            count--;
198            continue;
199          }
200          String javaStr = cond.inv.format_using(OutputFormat.JAVA);
201          String daikonStr = cond.inv.format_using(OutputFormat.DAIKON);
202          String escStr = cond.inv.format_using(OutputFormat.ESCJAVA);
203          String simplifyStr = cond.inv.format_using(OutputFormat.SIMPLIFY);
204          allConds.add(combineDummy(condIndex, "<dummy> " + daikonStr, escStr, simplifyStr));
205          //           allConds.add(condIndex);
206          if (count > 0) {
207            conjunctionJava.append(" && ");
208            conjunctionDaikon.append(" and ");
209            conjunctionESC.append(" && ");
210            conjunctionSimplify.append(" ");
211          }
212          conjunctionJava.append(javaStr);
213          conjunctionDaikon.append(daikonStr);
214          conjunctionESC.append(escStr);
215          conjunctionSimplify.append(simplifyStr);
216        }
217        conjunctionSimplify.append(")");
218        String conj = conjunctionJava.toString();
219        // Avoid inserting self-contradictory conditions such as "x == 1 &&
220        // x == 2", or conjunctions of only a single condition.
221        if (count < 2
222            || contradict_inv_pattern.matcher(conj).find()
223            || useless_inv_pattern_1.matcher(conj).find()
224            || useless_inv_pattern_2.matcher(conj).find()) {
225          // System.out.println("Suppressing: " + conj);
226        } else {
227          allConds.add(
228              combineDummy(
229                  conjunctionJava.toString(),
230                  conjunctionDaikon.toString(),
231                  conjunctionESC.toString(),
232                  conjunctionSimplify.toString()));
233        }
234      }
235
236      if (allConds.size() > 0) {
237        pw.println();
238        pw.println("PPT_NAME " + pptname);
239        for (String s : allConds) {
240          pw.println(s);
241        }
242      }
243      allConds.clear();
244    }
245
246    pw.flush();
247  }
248
249  static String combineDummy(String inv, String daikonStr, String esc, String simplify) {
250    StringBuilder combined = new StringBuilder(inv);
251    combined.append(lineSep + "\tDAIKON_FORMAT ");
252    combined.append(daikonStr);
253    combined.append(lineSep + "\tESC_FORMAT ");
254    combined.append(esc);
255    combined.append(lineSep + "\tSIMPLIFY_FORMAT ");
256    combined.append(simplify);
257    return combined.toString();
258  }
259
260  /**
261   * Extract consequents from a implications at a single program point. It only searches for
262   * top-level program points because Implications are produced only at those points.
263   */
264  public static void extract_consequent_maybe(PptTopLevel ppt, PptMap all_ppts) {
265    ppt.simplify_variable_names();
266
267    List<Invariant> invs = new ArrayList<>();
268    if (invs.size() > 0) {
269      String pptname = cleanup_pptname(ppt.name());
270      for (Invariant maybe_as_inv : invs) {
271        Implication maybe = (Implication) maybe_as_inv;
272
273        // don't print redundant invariants.
274        if (Daikon.suppress_redundant_invariants_with_simplify
275            && maybe.ppt.parent.redundant_invs.contains(maybe)) {
276          continue;
277        }
278
279        // don't print out invariants with min(), max(), or sum() variables
280        boolean mms = false;
281        VarInfo[] varbls = maybe.ppt.var_infos;
282        for (int v = 0; !mms && v < varbls.length; v++) {
283          mms |= varbls[v].isDerivedSequenceMinMaxSum();
284        }
285        if (mms) {
286          continue;
287        }
288
289        if (maybe.ppt.parent.ppt_name.isExitPoint()) {
290          for (int i = 0; i < maybe.ppt.var_infos.length; i++) {
291            VarInfo vi = maybe.ppt.var_infos[i];
292            if (vi.isDerivedParam()) {
293              continue;
294            }
295          }
296        }
297
298        Invariant consequent = maybe.consequent();
299        Invariant predicate = maybe.predicate();
300        Invariant inv, cluster_inv;
301        boolean cons_uses_cluster = false, pred_uses_cluster = false;
302        // extract the consequent (predicate) if the predicate
303        // (consequent) uses the variable "cluster".  Ignore if they
304        // both depend on "cluster"
305        if (consequent.usesVarDerived("cluster")) {
306          cons_uses_cluster = true;
307        }
308        if (predicate.usesVarDerived("cluster")) {
309          pred_uses_cluster = true;
310        }
311
312        if (!(pred_uses_cluster ^ cons_uses_cluster)) {
313          continue;
314        } else if (pred_uses_cluster) {
315          inv = consequent;
316          cluster_inv = predicate;
317        } else {
318          inv = predicate;
319          cluster_inv = consequent;
320        }
321
322        if (!inv.isWorthPrinting()) {
323          continue;
324        }
325
326        if (contains_constant_non_012(inv)) {
327          continue;
328        }
329
330        // filter out unwanted invariants
331
332        // 1) Invariants involving sequences
333        if (inv instanceof daikon.inv.binary.twoSequence.TwoSequence
334            || inv instanceof daikon.inv.binary.sequenceScalar.SequenceScalar
335            || inv instanceof daikon.inv.binary.sequenceString.SequenceString
336            || inv instanceof daikon.inv.unary.sequence.SingleSequence
337            || inv instanceof daikon.inv.unary.stringsequence.SingleStringSequence) {
338          continue;
339        }
340
341        if (inv instanceof daikon.inv.ternary.threeScalar.LinearTernary
342            || inv instanceof daikon.inv.binary.twoScalar.LinearBinary) {
343          continue;
344        }
345
346        String inv_string = inv.format_using(OutputFormat.JAVA);
347        if (orig_pattern.matcher(inv_string).find()
348            || dot_class_pattern.matcher(inv_string).find()) {
349          continue;
350        }
351        String fake_inv_string = simplify_inequalities(inv_string);
352        HashedConsequent real = new HashedConsequent(inv, null);
353        if (!fake_inv_string.equals(inv_string)) {
354          // For instance, inv_string is "x != y", fake_inv_string is "x == y"
355          HashedConsequent fake = new HashedConsequent(inv, inv_string);
356          boolean added =
357              store_invariant(
358                  cluster_inv.format_using(OutputFormat.JAVA), fake_inv_string, fake, pptname);
359          if (!added) {
360            // We couldn't add "x == y", (when we're "x != y") because
361            // it already exists; so don't add "x == y" either.
362            continue;
363          }
364        }
365        store_invariant(cluster_inv.format_using(OutputFormat.JAVA), inv_string, real, pptname);
366      }
367    }
368  }
369
370  // Store the invariant for later printing. Ignore duplicate
371  // invariants at the same program point.
372  private static boolean store_invariant(
373      String predicate, String index, HashedConsequent consequent, String pptname) {
374    if (!pptname_to_conditions.containsKey(pptname)) {
375      pptname_to_conditions.put(pptname, new HashMap<>());
376    }
377
378    Map<String, Map<String, HashedConsequent>> cluster_to_conditions =
379        pptname_to_conditions.get(pptname);
380    if (!cluster_to_conditions.containsKey(predicate)) {
381      cluster_to_conditions.put(predicate, new HashMap<>());
382    }
383
384    Map<String, HashedConsequent> conditions = cluster_to_conditions.get(predicate);
385    if (conditions.containsKey(index)) {
386      HashedConsequent old = conditions.get(index);
387      if (old.fakeFor != null && consequent.fakeFor == null) {
388        // We already saw (say) "x != y", but we're "x == y", so replace it.
389        conditions.remove(index);
390        conditions.remove(old.fakeFor);
391        conditions.put(index, consequent);
392        return true;
393      }
394      return false;
395    } else {
396      conditions.put(index, consequent);
397      return true;
398    }
399  }
400
401  private static boolean contains_constant_non_012(Invariant inv) {
402    if (inv instanceof daikon.inv.unary.scalar.OneOfScalar) {
403      daikon.inv.unary.scalar.OneOfScalar oneof = (daikon.inv.unary.scalar.OneOfScalar) inv;
404      // TODO: isInteresting has been removed.  Do we need to deal with it specially here?
405      // OneOf invariants that indicate a small set ( > 1 element) of
406      // possible values are not interesting, and have already been
407      // eliminated by the isInteresting check
408      long num = ((Long) oneof.elt()).longValue();
409      if (num > 2 || num < -1) {
410        return true;
411      }
412    }
413
414    return false;
415  }
416
417  // remove non-word characters and everything after ":::" from the
418  // program point name, leaving PackageName.ClassName.MethodName
419  private static String cleanup_pptname(String pptname) {
420    int index;
421    if ((index = pptname.indexOf("(")) > 0) {
422      pptname = pptname.substring(0, index);
423    }
424
425    if (pptname.endsWith(".")) {
426      pptname = pptname.substring(0, pptname.length() - 2);
427    }
428
429    Matcher m = non_word_pattern.matcher(pptname);
430    return m.replaceAll(".");
431  }
432
433  /**
434   * Prevents the occurence of "equivalent" inequalities, or inequalities which produce the same
435   * pair of splits at a program point, for example "x &le; y" and "x &gt; y". Replaces "&ge;" with
436   * "&lt;", "&le;" with "&gt;", and "!=" with "==" so that the occurence of equivalent inequalities
437   * can be detected. However it tries not to be smart ... If there is more than one inequality in
438   * the expression, it doesn't perform a substitution.
439   *
440   * @param condition a boolean equation
441   * @return the condition, with some equalities canonicalized
442   */
443  private static String simplify_inequalities(String condition) {
444    if (contains_exactly_one(condition, inequality_pattern)) {
445      if (gteq_pattern.matcher(condition).find()) {
446        condition = gteq_pattern.matcher(condition).replaceFirst("<");
447      } else if (lteq_pattern.matcher(condition).find()) {
448        condition = lteq_pattern.matcher(condition).replaceFirst(">");
449      } else if (neq_pattern.matcher(condition).find()) {
450        condition = neq_pattern.matcher(condition).replaceFirst("==");
451      } else {
452        throw new Error("this can't happen");
453      }
454    }
455    return condition;
456  }
457
458  private static boolean contains_exactly_one(String string, Pattern pattern) {
459    Matcher m = pattern.matcher(string);
460    // return true if first call returns true and second returns false
461    return m.find() && !m.find();
462  }
463
464  static Pattern orig_pattern, dot_class_pattern, non_word_pattern;
465  static Pattern gteq_pattern, lteq_pattern, neq_pattern, inequality_pattern;
466  static Pattern contradict_inv_pattern, useless_inv_pattern_1, useless_inv_pattern_2;
467
468  static {
469    try {
470      non_word_pattern = Pattern.compile("\\W+");
471      orig_pattern = Pattern.compile("orig\\s*\\(");
472      dot_class_pattern = Pattern.compile("\\.class");
473      inequality_pattern = Pattern.compile("[\\!<>]=");
474      gteq_pattern = Pattern.compile(">=");
475      lteq_pattern = Pattern.compile("<=");
476      neq_pattern = Pattern.compile("\\!=");
477      contradict_inv_pattern =
478          Pattern.compile("(^| && )(.*) == -?[0-9]+ &.*& \\2 == -?[0-9]+($| && )");
479      useless_inv_pattern_1 =
480          Pattern.compile("(^| && )(.*) > -?[0-9]+ &.*& \\2 > -?[0-9]+($| && )");
481      useless_inv_pattern_2 =
482          Pattern.compile("(^| && )(.*) < -?[0-9]+ &.*& \\2 < -?[0-9]+($| && )");
483    } catch (PatternSyntaxException me) {
484      throw new Error("ExtractConsequent: Error while compiling pattern " + me.getMessage());
485    }
486  }
487}