123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- #!/usr/bin/env python3
- # pylint: disable=invalid-name
- """
- Convert a log to CheckStyle format.
- Url: https://github.com/mdeweerd/LogToCheckStyle
- The log can then be used for generating annotations in a github action.
- Note: this script is very young and "quick and dirty".
- Patterns can be added to "PATTERNS" to match more messages.
- # Examples
- Assumes that logToCs.py is available as .github/logToCs.py.
- ## Example 1:
- ```yaml
- - run: |
- pre-commit run -all-files | tee pre-commit.log
- .github/logToCs.py pre-commit.log pre-commit.xml
- - uses: staabm/annotate-pull-request-from-checkstyle-action@v1
- with:
- files: pre-commit.xml
- notices-as-warnings: true # optional
- ```
- ## Example 2:
- ```yaml
- - run: |
- pre-commit run --all-files | tee pre-commit.log
- - name: Add results to PR
- if: ${{ always() }}
- run: |
- .github/logToCs.py pre-commit.log | cs2pr
- ```
- Author(s):
- - https://github.com/mdeweerd
- License: MIT License
- """
- import argparse
- import datetime as dt
- import json
- import os
- import re
- import sys
- import xml.etree.ElementTree as ET # nosec
- def remove_prefix(string, prefix):
- """
- Remove prefix from string
- Provided for backward compatibility.
- """
- if prefix and string.startswith(prefix):
- return string[len(prefix) :]
- return string
- def convert_notices_to_checkstyle(notices, root_path=None):
- """
- Convert annotation list to CheckStyle xml string
- """
- root = ET.Element("checkstyle")
- for fields in notices:
- add_error_entry(root, **fields, root_path=root_path)
- return ET.tostring(root, encoding="utf_8").decode("utf_8")
- def convert_lines_to_notices(lines):
- """
- Convert provided message to CheckStyle format.
- """
- notices = []
- for line in lines:
- fields = parse_message(line)
- if fields:
- notices.append(fields)
- return notices
- def convert_text_to_notices(text):
- """
- Convert provided message to CheckStyle format.
- """
- return parse_file(text)
- # Initial version for Checkrun from:
- # https://github.com/tayfun/flake8-your-pr/blob/50a175cde4dd26a656734c5b64ba1e5bb27151cb/src/main.py#L7C1-L123C36
- # MIT Licence
- class CheckRun:
- """
- Represents the check run
- """
- GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", None)
- GITHUB_EVENT_PATH = os.environ.get("GITHUB_EVENT_PATH", None)
- URI = "https://api.github.com"
- API_VERSION = "2022-11-28"
- ACCEPT_HEADER_VALUE = "application/vnd.github+json"
- AUTH_HEADER_VALUE = f"Bearer {GITHUB_TOKEN}"
- # This is the max annotations Github API accepts in one go.
- MAX_ANNOTATIONS = 50
- def __init__(self):
- """
- Initialise Check Run object with information from checkrun
- """
- self.read_event_file()
- self.read_meta_data()
- def read_event_file(self):
- """
- Read the event file to get the event information later.
- """
- if self.GITHUB_EVENT_PATH is None:
- raise ValueError("Not running in github workflow")
- with open(self.GITHUB_EVENT_PATH, encoding="utf_8") as event_file:
- self.event = json.loads(event_file.read())
- def read_meta_data(self):
- """
- Get meta data from event information
- """
- self.repo_full_name = self.event["repository"]["full_name"]
- pull_request = self.event.get("pull_request")
- print("%r", self.event)
- if pull_request:
- self.head_sha = pull_request["head"]["sha"]
- else:
- print("%r", self.event)
- check_suite = self.event.get("check_suite", None)
- if check_suite is not None:
- self.head_sha = check_suite["pull_requests"][0]["base"]["sha"]
- else:
- self.head_sha = None # Can't annotate?
- def submit( # pylint: disable=too-many-arguments
- self,
- notices,
- title=None,
- summary=None,
- text=None,
- conclusion=None,
- ):
- """
- Submit annotations to github
- See:
- https://docs.github.com/en/rest/checks/runs?apiVersion=2022-11-28
- #update-a-check-run
- :param conclusion: success, failure
- """
- # pylint: disable=import-outside-toplevel
- import requests # Import here to not impose presence of module
- if self.head_sha is None:
- return
- output = {
- "annotations": notices[: CheckRun.MAX_ANNOTATIONS],
- }
- if title is not None:
- output["title"] = title
- if summary is not None:
- output["summary"] = summary
- if text is not None:
- output["text"] = text
- if conclusion is None:
- # action_required, cancelled, failure, neutral, success
- # skipped, stale, timed_out
- if bool(notices):
- conclusion = "failure"
- else:
- conclusion = "success"
- payload = {
- "name": "log-to-pr-annotation",
- "head_sha": self.head_sha,
- "status": "completed", # queued, in_progress, completed
- "conclusion": conclusion,
- # "started_at": dt.datetime.now(dt.timezone.utc).isoformat(),
- "completed_at": dt.datetime.now(dt.timezone.utc).isoformat(),
- "output": output,
- }
- # Create the check-run
- response = requests.post(
- f"{self.URI}/repos/{self.repo_full_name}/check-runs",
- headers={
- "Accept": self.ACCEPT_HEADER_VALUE,
- "Authorization": self.AUTH_HEADER_VALUE,
- "X-GitHub-Api-Version": self.API_VERSION,
- },
- json=payload,
- timeout=30,
- )
- print(response.content)
- response.raise_for_status()
- ANY_REGEX = r".*?"
- FILE_REGEX = r"\s*(?P<file_name>\S.*?)\s*?"
- FILEGROUP_REGEX = r"\s*(?P<file_group>\S.*?)\s*?"
- EOL_REGEX = r"[\r\n]"
- LINE_REGEX = r"\s*(?P<line>\d+?)\s*?"
- COLUMN_REGEX = r"\s*(?P<column>\d+?)\s*?"
- SEVERITY_REGEX = r"\s*(?P<severity>error|warning|notice|style|info)\s*?"
- MSG_REGEX = r"\s*(?P<message>.+?)\s*?"
- MULTILINE_MSG_REGEX = r"\s*(?P<message>(?:.|.[\r\n])+)"
- # cpplint confidence index
- CONFIDENCE_REGEX = r"\s*\[(?P<confidence>\d+)\]\s*?"
- # List of message patterns, add more specific patterns earlier in the list
- # Creating patterns by using constants makes them easier to define and read.
- PATTERNS = [
- # beautysh
- # File ftp.sh: error: "esac" before "case" in line 90.
- re.compile(
- f"^File {FILE_REGEX}:{SEVERITY_REGEX}:"
- f" {MSG_REGEX} in line {LINE_REGEX}.$"
- ),
- # beautysh
- # File socks4echo.sh: error: indent/outdent mismatch: -2.
- re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
- # yamllint
- # ##[group].pre-commit-config.yaml
- # ##[error]97:14 [trailing-spaces] trailing spaces
- # ##[endgroup]
- re.compile(rf"^##\[group\]{FILEGROUP_REGEX}$"), # Start file group
- re.compile(
- rf"^##\[{SEVERITY_REGEX}\]{LINE_REGEX}:{COLUMN_REGEX}{MSG_REGEX}$"
- ), # Msg
- re.compile(r"^##(?P<file_endgroup>\[endgroup\])$"), # End file group
- # File socks4echo.sh: error: indent/outdent mismatch: -2.
- re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
- # Emacs style
- # path/to/file:845:5: error - Expected 1 space after closing brace
- re.compile(
- rf"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}:{SEVERITY_REGEX}"
- rf"-?\s{MSG_REGEX}$"
- ),
- # ESLint (JavaScript Linter), RoboCop, shellcheck
- # path/to/file.js:10:2: Some linting issue
- # path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected
- # path/to/script.sh:10:1: SC2034: Some shell script issue
- re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"),
- # Cpplint default output:
- # '%s:%s: %s [%s] [%d]\n'
- # % (filename, linenum, message, category, confidence)
- re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"),
- # MSVC
- # file.cpp(10): error C1234: Some error message
- re.compile(
- f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$"
- ),
- # Java compiler
- # File.java:10: error: Some error message
- re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"),
- # Python
- # File ".../logToCs.py", line 90 (note: code line follows)
- re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'),
- # Pylint, others
- # path/to/file.py:10: [C0111] Missing docstring
- # others
- re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"),
- # Shellcheck:
- # In script.sh line 76:
- re.compile(
- f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?"
- f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}"
- ),
- # eslint:
- # /path/to/filename
- # 14:5 error Unexpected trailing comma comma-dangle
- re.compile(
- f"^{FILE_REGEX}{EOL_REGEX}"
- rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$"
- ),
- ]
- # Exceptionnaly some regexes match messages that are not error.
- # This pattern matches those exceptions
- EXCLUDE_MSG_PATTERN = re.compile(
- r"^("
- r"Placeholder pattern" # To remove on first message pattern
- r")"
- )
- # Exceptionnaly some regexes match messages that are not error.
- # This pattern matches those exceptions
- EXCLUDE_FILE_PATTERN = re.compile(
- r"^("
- # Codespell: (appears as a file name):
- r"Used config files\b"
- r")"
- )
- # Severities available in CodeSniffer report format
- SEVERITY_NOTICE = "notice"
- SEVERITY_WARNING = "warning"
- SEVERITY_ERROR = "error"
- def strip_ansi(text: str):
- """
- Strip ANSI escape sequences from string (colors, etc)
- """
- return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text)
- def parse_file(text):
- """
- Parse all messages in a file
- Returns the fields in a dict.
- """
- # pylint: disable=too-many-branches,too-many-statements
- # regex required to allow same group names
- try:
- import regex # pylint: disable=import-outside-toplevel
- except ImportError as exc:
- raise ImportError(
- "The 'parsefile' method requires 'python -m pip install regex'"
- ) from exc
- patterns = [pattern.pattern for pattern in PATTERNS]
- # patterns = [PATTERNS[0].pattern]
- file_group = None # The file name for the group (if any)
- full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))"
- results = []
- for fields in regex.finditer(
- full_regex, strip_ansi(text), regex.MULTILINE | regex.IGNORECASE
- ):
- if not fields:
- continue
- result = fields.groupdict()
- if len(result) == 0:
- continue
- severity = result.get("severity", None)
- file_name = result.get("file_name", None)
- confidence = result.pop("confidence", None)
- new_file_group = result.pop("file_group", None)
- file_endgroup = result.pop("file_endgroup", None)
- message = result.get("message", None)
- if new_file_group is not None:
- # Start of file_group, just store file
- file_group = new_file_group
- continue
- if file_endgroup is not None:
- file_group = None
- continue
- if file_name is None:
- if file_group is not None:
- file_name = file_group
- result["file_name"] = file_name
- else:
- # No filename, skip
- continue
- else:
- if EXCLUDE_FILE_PATTERN.search(file_name):
- # This file_name is excluded
- continue
- if message is not None:
- if EXCLUDE_MSG_PATTERN.search(message):
- # This message is excluded
- continue
- if confidence is not None:
- # Convert confidence level of cpplint
- # to warning, etc.
- confidence = int(confidence)
- if confidence <= 1:
- severity = SEVERITY_NOTICE
- elif confidence >= 5:
- severity = SEVERITY_ERROR
- else:
- severity = SEVERITY_WARNING
- if severity is None:
- severity = SEVERITY_ERROR
- else:
- severity = severity.lower()
- if severity in ["info", "style"]:
- severity = SEVERITY_NOTICE
- result["severity"] = severity
- results.append(result)
- return results
- def parse_message(message):
- """
- Parse message until it matches a pattern.
- Returns the fields in a dict.
- """
- for pattern in PATTERNS:
- fields = pattern.match(message, re.IGNORECASE)
- if not fields:
- continue
- result = fields.groupdict()
- if len(result) == 0:
- continue
- if "confidence" in result:
- # Convert confidence level of cpplint
- # to warning, etc.
- confidence = int(result["confidence"])
- del result["confidence"]
- if confidence <= 1:
- severity = SEVERITY_NOTICE
- elif confidence >= 5:
- severity = SEVERITY_ERROR
- else:
- severity = SEVERITY_WARNING
- result["severity"] = severity
- if "severity" not in result:
- result["severity"] = SEVERITY_ERROR
- else:
- result["severity"] = result["severity"].lower()
- if result["severity"] in ["info", "style"]:
- result["severity"] = SEVERITY_NOTICE
- return result
- # Nothing matched
- return None
- def add_error_entry( # pylint: disable=too-many-arguments
- root,
- severity,
- file_name,
- line=None,
- column=None,
- message=None,
- source=None,
- root_path=None,
- ):
- """
- Add error information to the CheckStyle output being created.
- """
- file_element = find_or_create_file_element(
- root, file_name, root_path=root_path
- )
- error_element = ET.SubElement(file_element, "error")
- error_element.set("severity", severity)
- if line:
- error_element.set("line", line)
- if column:
- error_element.set("column", column)
- if message:
- error_element.set("message", message)
- if source:
- # To verify if this is a valid attribute
- error_element.set("source", source)
- def find_or_create_file_element(root, file_name: str, root_path=None):
- """
- Find/create file element in XML document tree.
- """
- if root_path is not None:
- file_name = remove_prefix(file_name, root_path)
- for file_element in root.findall("file"):
- if file_element.get("name") == file_name:
- return file_element
- file_element = ET.SubElement(root, "file")
- file_element.set("name", file_name)
- return file_element
- def main():
- """
- Parse the script arguments and get the conversion done.
- """
- parser = argparse.ArgumentParser(
- description="Convert messages to Checkstyle XML format."
- )
- parser.add_argument(
- "input", help="Input file. Use '-' for stdin.", nargs="?", default="-"
- )
- parser.add_argument(
- "output",
- help="Output file. Use '-' for stdout.",
- nargs="?",
- default="-",
- )
- parser.add_argument(
- "-i",
- "--in",
- dest="input_named",
- help="Input filename. Overrides positional input.",
- )
- parser.add_argument(
- "-o",
- "--out",
- dest="output_named",
- help="Output filename. Overrides positional output.",
- )
- parser.add_argument(
- "--root",
- metavar="ROOT_PATH",
- help="Root directory to remove from file paths."
- " Defaults to working directory.",
- default=os.getcwd(),
- )
- parser.add_argument(
- "--github-annotate",
- action=argparse.BooleanOptionalAction,
- help="Annotate when in Github workflow.",
- # Currently disabled,
- # Future: (os.environ.get("GITHUB_EVENT_PATH", None) is not None),
- default=False,
- )
- args = parser.parse_args()
- if args.input == "-" and args.input_named:
- with open(
- args.input_named, encoding="utf_8", errors="surrogateescape"
- ) as input_file:
- text = input_file.read()
- elif args.input != "-":
- with open(
- args.input, encoding="utf_8", errors="surrogateescape"
- ) as input_file:
- text = input_file.read()
- else:
- text = sys.stdin.read()
- root_path = os.path.join(args.root, "")
- try:
- notices = convert_text_to_notices(text)
- except ImportError:
- notices = convert_lines_to_notices(re.split(r"[\r\n]+", text))
- checkstyle_xml = convert_notices_to_checkstyle(
- notices, root_path=root_path
- )
- if args.output == "-" and args.output_named:
- with open(args.output_named, "w", encoding="utf_8") as output_file:
- output_file.write(checkstyle_xml)
- elif args.output != "-":
- with open(args.output, "w", encoding="utf_8") as output_file:
- output_file.write(checkstyle_xml)
- else:
- print(checkstyle_xml)
- if args.github_annotate:
- checkrun = CheckRun()
- checkrun.submit(notices)
- if __name__ == "__main__":
- main()
|