logToCs.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. #!/usr/bin/env python3
  2. # pylint: disable=invalid-name
  3. """
  4. Convert a log to CheckStyle format.
  5. Url: https://github.com/mdeweerd/LogToCheckStyle
  6. The log can then be used for generating annotations in a github action.
  7. Note: this script is very young and "quick and dirty".
  8. Patterns can be added to "PATTERNS" to match more messages.
  9. # Examples
  10. Assumes that logToCs.py is available as .github/logToCs.py.
  11. ## Example 1:
  12. ```yaml
  13. - run: |
  14. pre-commit run -all-files | tee pre-commit.log
  15. .github/logToCs.py pre-commit.log pre-commit.xml
  16. - uses: staabm/annotate-pull-request-from-checkstyle-action@v1
  17. with:
  18. files: pre-commit.xml
  19. notices-as-warnings: true # optional
  20. ```
  21. ## Example 2:
  22. ```yaml
  23. - run: |
  24. pre-commit run --all-files | tee pre-commit.log
  25. - name: Add results to PR
  26. if: ${{ always() }}
  27. run: |
  28. .github/logToCs.py pre-commit.log | cs2pr
  29. ```
  30. Author(s):
  31. - https://github.com/mdeweerd
  32. License: MIT License
  33. """
  34. import argparse
  35. import datetime as dt
  36. import json
  37. import os
  38. import re
  39. import sys
  40. import xml.etree.ElementTree as ET # nosec
  41. def remove_prefix(string, prefix):
  42. """
  43. Remove prefix from string
  44. Provided for backward compatibility.
  45. """
  46. if prefix and string.startswith(prefix):
  47. return string[len(prefix) :]
  48. return string
  49. def convert_notices_to_checkstyle(notices, root_path=None):
  50. """
  51. Convert annotation list to CheckStyle xml string
  52. """
  53. root = ET.Element("checkstyle")
  54. for fields in notices:
  55. add_error_entry(root, **fields, root_path=root_path)
  56. return ET.tostring(root, encoding="utf_8").decode("utf_8")
  57. def convert_lines_to_notices(lines):
  58. """
  59. Convert provided message to CheckStyle format.
  60. """
  61. notices = []
  62. for line in lines:
  63. fields = parse_message(line)
  64. if fields:
  65. notices.append(fields)
  66. return notices
  67. def convert_text_to_notices(text):
  68. """
  69. Convert provided message to CheckStyle format.
  70. """
  71. return parse_file(text)
  72. # Initial version for Checkrun from:
  73. # https://github.com/tayfun/flake8-your-pr/blob/50a175cde4dd26a656734c5b64ba1e5bb27151cb/src/main.py#L7C1-L123C36
  74. # MIT Licence
  75. class CheckRun:
  76. """
  77. Represents the check run
  78. """
  79. GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", None)
  80. GITHUB_EVENT_PATH = os.environ.get("GITHUB_EVENT_PATH", None)
  81. URI = "https://api.github.com"
  82. API_VERSION = "2022-11-28"
  83. ACCEPT_HEADER_VALUE = "application/vnd.github+json"
  84. AUTH_HEADER_VALUE = f"Bearer {GITHUB_TOKEN}"
  85. # This is the max annotations Github API accepts in one go.
  86. MAX_ANNOTATIONS = 50
  87. def __init__(self):
  88. """
  89. Initialise Check Run object with information from checkrun
  90. """
  91. self.read_event_file()
  92. self.read_meta_data()
  93. def read_event_file(self):
  94. """
  95. Read the event file to get the event information later.
  96. """
  97. if self.GITHUB_EVENT_PATH is None:
  98. raise ValueError("Not running in github workflow")
  99. with open(self.GITHUB_EVENT_PATH, encoding="utf_8") as event_file:
  100. self.event = json.loads(event_file.read())
  101. def read_meta_data(self):
  102. """
  103. Get meta data from event information
  104. """
  105. self.repo_full_name = self.event["repository"]["full_name"]
  106. pull_request = self.event.get("pull_request")
  107. print("%r", self.event)
  108. if pull_request:
  109. self.head_sha = pull_request["head"]["sha"]
  110. else:
  111. print("%r", self.event)
  112. check_suite = self.event.get("check_suite", None)
  113. if check_suite is not None:
  114. self.head_sha = check_suite["pull_requests"][0]["base"]["sha"]
  115. else:
  116. self.head_sha = None # Can't annotate?
  117. def submit( # pylint: disable=too-many-arguments
  118. self,
  119. notices,
  120. title=None,
  121. summary=None,
  122. text=None,
  123. conclusion=None,
  124. ):
  125. """
  126. Submit annotations to github
  127. See:
  128. https://docs.github.com/en/rest/checks/runs?apiVersion=2022-11-28
  129. #update-a-check-run
  130. :param conclusion: success, failure
  131. """
  132. # pylint: disable=import-outside-toplevel
  133. import requests # Import here to not impose presence of module
  134. if self.head_sha is None:
  135. return
  136. output = {
  137. "annotations": notices[: CheckRun.MAX_ANNOTATIONS],
  138. }
  139. if title is not None:
  140. output["title"] = title
  141. if summary is not None:
  142. output["summary"] = summary
  143. if text is not None:
  144. output["text"] = text
  145. if conclusion is None:
  146. # action_required, cancelled, failure, neutral, success
  147. # skipped, stale, timed_out
  148. if bool(notices):
  149. conclusion = "failure"
  150. else:
  151. conclusion = "success"
  152. payload = {
  153. "name": "log-to-pr-annotation",
  154. "head_sha": self.head_sha,
  155. "status": "completed", # queued, in_progress, completed
  156. "conclusion": conclusion,
  157. # "started_at": dt.datetime.now(dt.timezone.utc).isoformat(),
  158. "completed_at": dt.datetime.now(dt.timezone.utc).isoformat(),
  159. "output": output,
  160. }
  161. # Create the check-run
  162. response = requests.post(
  163. f"{self.URI}/repos/{self.repo_full_name}/check-runs",
  164. headers={
  165. "Accept": self.ACCEPT_HEADER_VALUE,
  166. "Authorization": self.AUTH_HEADER_VALUE,
  167. "X-GitHub-Api-Version": self.API_VERSION,
  168. },
  169. json=payload,
  170. timeout=30,
  171. )
  172. print(response.content)
  173. response.raise_for_status()
  174. ANY_REGEX = r".*?"
  175. FILE_REGEX = r"\s*(?P<file_name>\S.*?)\s*?"
  176. FILEGROUP_REGEX = r"\s*(?P<file_group>\S.*?)\s*?"
  177. EOL_REGEX = r"[\r\n]"
  178. LINE_REGEX = r"\s*(?P<line>\d+?)\s*?"
  179. COLUMN_REGEX = r"\s*(?P<column>\d+?)\s*?"
  180. SEVERITY_REGEX = r"\s*(?P<severity>error|warning|notice|style|info)\s*?"
  181. MSG_REGEX = r"\s*(?P<message>.+?)\s*?"
  182. MULTILINE_MSG_REGEX = r"\s*(?P<message>(?:.|.[\r\n])+)"
  183. # cpplint confidence index
  184. CONFIDENCE_REGEX = r"\s*\[(?P<confidence>\d+)\]\s*?"
  185. # List of message patterns, add more specific patterns earlier in the list
  186. # Creating patterns by using constants makes them easier to define and read.
  187. PATTERNS = [
  188. # beautysh
  189. # File ftp.sh: error: "esac" before "case" in line 90.
  190. re.compile(
  191. f"^File {FILE_REGEX}:{SEVERITY_REGEX}:"
  192. f" {MSG_REGEX} in line {LINE_REGEX}.$"
  193. ),
  194. # beautysh
  195. # File socks4echo.sh: error: indent/outdent mismatch: -2.
  196. re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
  197. # yamllint
  198. # ##[group].pre-commit-config.yaml
  199. # ##[error]97:14 [trailing-spaces] trailing spaces
  200. # ##[endgroup]
  201. re.compile(rf"^##\[group\]{FILEGROUP_REGEX}$"), # Start file group
  202. re.compile(
  203. rf"^##\[{SEVERITY_REGEX}\]{LINE_REGEX}:{COLUMN_REGEX}{MSG_REGEX}$"
  204. ), # Msg
  205. re.compile(r"^##(?P<file_endgroup>\[endgroup\])$"), # End file group
  206. # File socks4echo.sh: error: indent/outdent mismatch: -2.
  207. re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"),
  208. # Emacs style
  209. # path/to/file:845:5: error - Expected 1 space after closing brace
  210. re.compile(
  211. rf"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}:{SEVERITY_REGEX}"
  212. rf"-?\s{MSG_REGEX}$"
  213. ),
  214. # ESLint (JavaScript Linter), RoboCop, shellcheck
  215. # path/to/file.js:10:2: Some linting issue
  216. # path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected
  217. # path/to/script.sh:10:1: SC2034: Some shell script issue
  218. re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"),
  219. # Cpplint default output:
  220. # '%s:%s: %s [%s] [%d]\n'
  221. # % (filename, linenum, message, category, confidence)
  222. re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"),
  223. # MSVC
  224. # file.cpp(10): error C1234: Some error message
  225. re.compile(
  226. f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$"
  227. ),
  228. # Java compiler
  229. # File.java:10: error: Some error message
  230. re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"),
  231. # Python
  232. # File ".../logToCs.py", line 90 (note: code line follows)
  233. re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'),
  234. # Pylint, others
  235. # path/to/file.py:10: [C0111] Missing docstring
  236. # others
  237. re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"),
  238. # Shellcheck:
  239. # In script.sh line 76:
  240. re.compile(
  241. f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?"
  242. f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}"
  243. ),
  244. # eslint:
  245. # /path/to/filename
  246. # 14:5 error Unexpected trailing comma comma-dangle
  247. re.compile(
  248. f"^{FILE_REGEX}{EOL_REGEX}"
  249. rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$"
  250. ),
  251. ]
  252. # Exceptionnaly some regexes match messages that are not error.
  253. # This pattern matches those exceptions
  254. EXCLUDE_MSG_PATTERN = re.compile(
  255. r"^("
  256. r"Placeholder pattern" # To remove on first message pattern
  257. r")"
  258. )
  259. # Exceptionnaly some regexes match messages that are not error.
  260. # This pattern matches those exceptions
  261. EXCLUDE_FILE_PATTERN = re.compile(
  262. r"^("
  263. # Codespell: (appears as a file name):
  264. r"Used config files\b"
  265. r")"
  266. )
  267. # Severities available in CodeSniffer report format
  268. SEVERITY_NOTICE = "notice"
  269. SEVERITY_WARNING = "warning"
  270. SEVERITY_ERROR = "error"
  271. def strip_ansi(text: str):
  272. """
  273. Strip ANSI escape sequences from string (colors, etc)
  274. """
  275. return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text)
  276. def parse_file(text):
  277. """
  278. Parse all messages in a file
  279. Returns the fields in a dict.
  280. """
  281. # pylint: disable=too-many-branches,too-many-statements
  282. # regex required to allow same group names
  283. try:
  284. import regex # pylint: disable=import-outside-toplevel
  285. except ImportError as exc:
  286. raise ImportError(
  287. "The 'parsefile' method requires 'python -m pip install regex'"
  288. ) from exc
  289. patterns = [pattern.pattern for pattern in PATTERNS]
  290. # patterns = [PATTERNS[0].pattern]
  291. file_group = None # The file name for the group (if any)
  292. full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))"
  293. results = []
  294. for fields in regex.finditer(
  295. full_regex, strip_ansi(text), regex.MULTILINE | regex.IGNORECASE
  296. ):
  297. if not fields:
  298. continue
  299. result = fields.groupdict()
  300. if len(result) == 0:
  301. continue
  302. severity = result.get("severity", None)
  303. file_name = result.get("file_name", None)
  304. confidence = result.pop("confidence", None)
  305. new_file_group = result.pop("file_group", None)
  306. file_endgroup = result.pop("file_endgroup", None)
  307. message = result.get("message", None)
  308. if new_file_group is not None:
  309. # Start of file_group, just store file
  310. file_group = new_file_group
  311. continue
  312. if file_endgroup is not None:
  313. file_group = None
  314. continue
  315. if file_name is None:
  316. if file_group is not None:
  317. file_name = file_group
  318. result["file_name"] = file_name
  319. else:
  320. # No filename, skip
  321. continue
  322. else:
  323. if EXCLUDE_FILE_PATTERN.search(file_name):
  324. # This file_name is excluded
  325. continue
  326. if message is not None:
  327. if EXCLUDE_MSG_PATTERN.search(message):
  328. # This message is excluded
  329. continue
  330. if confidence is not None:
  331. # Convert confidence level of cpplint
  332. # to warning, etc.
  333. confidence = int(confidence)
  334. if confidence <= 1:
  335. severity = SEVERITY_NOTICE
  336. elif confidence >= 5:
  337. severity = SEVERITY_ERROR
  338. else:
  339. severity = SEVERITY_WARNING
  340. if severity is None:
  341. severity = SEVERITY_ERROR
  342. else:
  343. severity = severity.lower()
  344. if severity in ["info", "style"]:
  345. severity = SEVERITY_NOTICE
  346. result["severity"] = severity
  347. results.append(result)
  348. return results
  349. def parse_message(message):
  350. """
  351. Parse message until it matches a pattern.
  352. Returns the fields in a dict.
  353. """
  354. for pattern in PATTERNS:
  355. fields = pattern.match(message, re.IGNORECASE)
  356. if not fields:
  357. continue
  358. result = fields.groupdict()
  359. if len(result) == 0:
  360. continue
  361. if "confidence" in result:
  362. # Convert confidence level of cpplint
  363. # to warning, etc.
  364. confidence = int(result["confidence"])
  365. del result["confidence"]
  366. if confidence <= 1:
  367. severity = SEVERITY_NOTICE
  368. elif confidence >= 5:
  369. severity = SEVERITY_ERROR
  370. else:
  371. severity = SEVERITY_WARNING
  372. result["severity"] = severity
  373. if "severity" not in result:
  374. result["severity"] = SEVERITY_ERROR
  375. else:
  376. result["severity"] = result["severity"].lower()
  377. if result["severity"] in ["info", "style"]:
  378. result["severity"] = SEVERITY_NOTICE
  379. return result
  380. # Nothing matched
  381. return None
  382. def add_error_entry( # pylint: disable=too-many-arguments
  383. root,
  384. severity,
  385. file_name,
  386. line=None,
  387. column=None,
  388. message=None,
  389. source=None,
  390. root_path=None,
  391. ):
  392. """
  393. Add error information to the CheckStyle output being created.
  394. """
  395. file_element = find_or_create_file_element(
  396. root, file_name, root_path=root_path
  397. )
  398. error_element = ET.SubElement(file_element, "error")
  399. error_element.set("severity", severity)
  400. if line:
  401. error_element.set("line", line)
  402. if column:
  403. error_element.set("column", column)
  404. if message:
  405. error_element.set("message", message)
  406. if source:
  407. # To verify if this is a valid attribute
  408. error_element.set("source", source)
  409. def find_or_create_file_element(root, file_name: str, root_path=None):
  410. """
  411. Find/create file element in XML document tree.
  412. """
  413. if root_path is not None:
  414. file_name = remove_prefix(file_name, root_path)
  415. for file_element in root.findall("file"):
  416. if file_element.get("name") == file_name:
  417. return file_element
  418. file_element = ET.SubElement(root, "file")
  419. file_element.set("name", file_name)
  420. return file_element
  421. def main():
  422. """
  423. Parse the script arguments and get the conversion done.
  424. """
  425. parser = argparse.ArgumentParser(
  426. description="Convert messages to Checkstyle XML format."
  427. )
  428. parser.add_argument(
  429. "input", help="Input file. Use '-' for stdin.", nargs="?", default="-"
  430. )
  431. parser.add_argument(
  432. "output",
  433. help="Output file. Use '-' for stdout.",
  434. nargs="?",
  435. default="-",
  436. )
  437. parser.add_argument(
  438. "-i",
  439. "--in",
  440. dest="input_named",
  441. help="Input filename. Overrides positional input.",
  442. )
  443. parser.add_argument(
  444. "-o",
  445. "--out",
  446. dest="output_named",
  447. help="Output filename. Overrides positional output.",
  448. )
  449. parser.add_argument(
  450. "--root",
  451. metavar="ROOT_PATH",
  452. help="Root directory to remove from file paths."
  453. " Defaults to working directory.",
  454. default=os.getcwd(),
  455. )
  456. parser.add_argument(
  457. "--github-annotate",
  458. action=argparse.BooleanOptionalAction,
  459. help="Annotate when in Github workflow.",
  460. # Currently disabled,
  461. # Future: (os.environ.get("GITHUB_EVENT_PATH", None) is not None),
  462. default=False,
  463. )
  464. args = parser.parse_args()
  465. if args.input == "-" and args.input_named:
  466. with open(
  467. args.input_named, encoding="utf_8", errors="surrogateescape"
  468. ) as input_file:
  469. text = input_file.read()
  470. elif args.input != "-":
  471. with open(
  472. args.input, encoding="utf_8", errors="surrogateescape"
  473. ) as input_file:
  474. text = input_file.read()
  475. else:
  476. text = sys.stdin.read()
  477. root_path = os.path.join(args.root, "")
  478. try:
  479. notices = convert_text_to_notices(text)
  480. except ImportError:
  481. notices = convert_lines_to_notices(re.split(r"[\r\n]+", text))
  482. checkstyle_xml = convert_notices_to_checkstyle(
  483. notices, root_path=root_path
  484. )
  485. if args.output == "-" and args.output_named:
  486. with open(args.output_named, "w", encoding="utf_8") as output_file:
  487. output_file.write(checkstyle_xml)
  488. elif args.output != "-":
  489. with open(args.output, "w", encoding="utf_8") as output_file:
  490. output_file.write(checkstyle_xml)
  491. else:
  492. print(checkstyle_xml)
  493. if args.github_annotate:
  494. checkrun = CheckRun()
  495. checkrun.submit(notices)
  496. if __name__ == "__main__":
  497. main()