#!/usr/bin/env python3

"""Given compiler output, sort the errors/warnings by filename.

Reads from standard input, writes the sorted errors to standard out.
Works for any tool that produces output in the standard format
(https://www.gnu.org/prep/standards/html_node/Errors.html).
This script is useful for compilers such as javac that process files in
nondeterministic order.
"""

## TODO: Maybe use the comparison from the `sort-directory-order` script.

## TODO: Need to sort *numerically*, not *lexicographically*, for line and column numbers.

import re
import sys
from itertools import groupby
from operator import itemgetter
from pathlib import Path

## TODO: permit reading from stdin *or* a file on the command line.
## https://stackoverflow.com/questions/7576525/
if len(sys.argv) != 1:
    print(Path(__file__).name + ": Do not pass arguments, use stdin and stdout.")
    sys.exit(1)

error_start_without_groups_string = (
    r"("
    r"^(?:\[(?:ERROR|WARNING)\] )?"
    r"(?:-> \[Help|"
    r"[^ ].*:\[?[0-9]+(?:[:,](?:[0-9]+))?\]? "
    r")"
    r")"
)
error_start_with_groups_string = (
    r"^(?:\[(?:ERROR|WARNING)\] )?"
    r"(?P<most>-> \[Help|"
    r"(?P<filename>[^ ].*):\[?(?P<lineno>[0-9]+)(?:[:,](?P<columnno>[0-9]+))?\]? "
    r")"
)
error_start_without_groups_re = re.compile(error_start_without_groups_string, re.MULTILINE)
error_start_with_groups_re = re.compile(error_start_with_groups_string)

all_input = sys.stdin.read()

chunks = re.split(error_start_without_groups_re, all_input)
prefix = chunks[0]
chunks = chunks[1:]

if False:
    for chunk in chunks:
        print("chunk:")
        print(chunk, end="")
        print("endchunk.")

chunk_it = iter(chunks)
# Pass the SAME iterator to both, so alternating elements are taken
errors = [x + y for x, y in zip(chunk_it, chunk_it, strict=True)]


def error_sort_key(error: str) -> str:
    """Given an error, return its sort key.

    Args:
        error: an error message

    Returns:
        the sort key of the message
    """
    match = re.match(error_start_with_groups_re, error)
    if match is None:
        # print("sortkey: no match for", error)
        return str(error)
    matchdict = match.groupdict("")
    result = (
        str(matchdict.get("filename"))
        + str(matchdict.get("lineno")).zfill(6)
        + str(matchdict.get("columnno")).zfill(3)
        + str(matchdict.get("most"))
    )
    # print("sortkey =", result, "for", error)
    return result


if False:
    for error in errors:
        print("sortkey: ", error_sort_key(error))
        print("error:")
        print(error, end="")
        print("enderror.")


errors.sort(key=error_sort_key)

if False:
    print(prefix)
    for error in errors:
        print("sortederror:")
        print(error, end="")
        print("endsortederror.")

errors = list(map(itemgetter(0), groupby(errors)))


print(prefix)
for error in errors:
    print(error, end="")
