diff --git a/mjtest/cli.py b/mjtest/cli.py index 2c07f6d5b64a22e64a45c7b100ba630e645f8311..b0fae8377692b5a79364bd572d9d57cf609ff15e 100644 --- a/mjtest/cli.py +++ b/mjtest/cli.py @@ -55,6 +55,10 @@ if True:#__name__ == '__main__': parser.add_argument("--ci_testing", action="store_true", default=False, help="In mode X the succeeding test cases of later modes/phases should also succeed in " "this mode, and failing test cases of prior modes/phases should also fail in this phase.") + parser.add_argument("--bench_compiler_flags", action="store", default="", type=str, nargs=2, + help="Set the different compiler flags/modes that are compared, 'javac' for the java compiler") + parser.add_argument("--bench_runs", action="store", type=int, default=10, + help="Number of times to run a benchmarked code") #parser.add_argument("--timeout", action="store_const", default=30, const="timeout", # help="Abort a program after TIMEOUT seconds") #parser.add_argument("--report_dir", action="store_const", default="", const="report_dir", diff --git a/mjtest/environment.py b/mjtest/environment.py index 57400722e9cab5b25b1416e47696cb97c83afd15..25054e686ee73e8ca7d9df4eeed7cf703db01031 100644 --- a/mjtest/environment.py +++ b/mjtest/environment.py @@ -4,6 +4,7 @@ import random import shlex import shutil import tempfile +import sys from datetime import datetime import time from threading import Timer @@ -33,13 +34,17 @@ class TestMode: exec = "exec" + bench = "bench" + USE_TESTS_OF_OTHER = { ast: [syntax], - compile_firm: [exec] + compile_firm: [exec], + bench: [exec] } """ All 'success' tests of the n.th mode can used as 'success' tests for the n-1.th mode""" -TEST_MODES = [TestMode.lexer, TestMode.syntax, TestMode.ast, TestMode.semantic, TestMode.compile_firm, TestMode.exec] +TEST_MODES = [TestMode.lexer, TestMode.syntax, TestMode.ast, TestMode.semantic, TestMode.compile_firm, + TestMode.exec, TestMode.bench] def get_test_dirname(mode: str) -> str: @@ -66,7 +71,8 @@ class Environment: produce_no_reports: bool = True, output_no_incorrect_reports: bool = False, produce_all_reports: bool = False, report_subdir: str = None, ci_testing: bool = False, color: bool = False, - all_exec_tests: bool = True): + all_exec_tests: bool = True, bench_compiler_flags: List[str]=[], + bench_runs: int = 10): if color: force_colored_output() self.mode = mode @@ -119,6 +125,9 @@ class Environment: self.timeout = float(os.getenv("MJ_TIMEOUT", "10")) self.big_timeout = float(os.getenv("MJ_BIG_TIMEOUT", "60")) + self.bench_compiler_flags = bench_compiler_flags + self.bench_runs = bench_runs + def create_tmpfile(self) -> str: self._tmp_file_ctr += 1 return os.path.join(self.tmp_dir, str(round(time.time() * 100000)) diff --git a/mjtest/test/bench.py b/mjtest/test/bench.py new file mode 100644 index 0000000000000000000000000000000000000000..55b8f463aa9cd656c285f387f45217d5de6f52b0 --- /dev/null +++ b/mjtest/test/bench.py @@ -0,0 +1,149 @@ +import hashlib +import logging +import os +import shutil +import signal +from os import path +from typing import List, Tuple + +import math + +import subprocess + +import time + +from mjtest.environment import TestMode, Environment +from mjtest.test.syntax_tests import BasicSyntaxTest +from mjtest.test.tests import TestCase, BasicDiffTestResult, BasicTestResult, ExtensibleTestResult +from mjtest.util.shell import SigKill +from mjtest.util.utils import get_main_class_name, InsertionTimeOrderedDict + +_LOG = logging.getLogger("bench_tests") + +class _RunResult: + + def __init__(self, runs: List[float], is_correct: bool): + self.runs = runs + self.is_correct = is_correct + + def mean(self) -> float: + return sum(self.runs) / self.number() + + def stddev(self) -> float: + m = self.mean() + return math.sqrt(sum(map(lambda x: (x - m) ** 2, self.runs)) / self.number()) + + def min(self) -> float: + return min(self.runs) + + def number(self) -> int: + return len(self.runs) + + +class BenchExecTest(BasicSyntaxTest): + """ + Simple benchmark test. The new compiler mode shouldn't be slower than the old ones (or javac) + """ + + FILE_ENDINGS = [".java", ".mj"] + INVALID_FILE_ENDINGS = [".inf.java", ".inf.mj"] + MODE = TestMode.compile_firm + + def __init__(self, env: Environment, type: str, file: str, preprocessed_file: str): + super().__init__(env, type, file, preprocessed_file) + self._should_succeed = True + + def _bench_command(self, cmd: str, *args: Tuple[str]) -> _RunResult: + runs = [] # type: List[float] + for i in range(0, self.env.bench_runs): + try: + start = time.time() + subprocess.check_call([cmd] + list(args), stdout=subprocess.DEVNULL) + runs.append(time.time() - start) + except subprocess.CalledProcessError: + return _RunResult([], False) + return _RunResult(runs, True) + + def run(self) -> BasicDiffTestResult: + is_big_testcase = "big" in self.file + timeout = self.env.big_timeout if is_big_testcase else self.env.timeout + base_filename = path.basename(self.file).split(".")[0] + tmp_dir = self.env.create_pid_local_tmpdir() + shutil.copy(self.preprocessed_file, path.join(tmp_dir, base_filename + ".java")) + cwd = os.getcwd() + os.chdir(tmp_dir) + + test_result = ExtensibleTestResult(self) + + results = [] # type: List[_RunResult] + + for compiler_flag in self.env.bench_compiler_flags: + if compiler_flag == "javac": + _, err, javac_rtcode = \ + self.env.run_command("javac", base_filename + ".java", timeout=timeout) + if javac_rtcode != 0: + _LOG.error("File \"{}\" isn't valid Java".format(self.preprocessed_file)) + test_result.incorrect_msg = "invalid java code, but output file missing" + test_result.set_error_code(javac_rtcode) + test_result.add_long_text("Javac error message", err.decode()) + test_result.add_file("Source file", self.preprocessed_file) + os.chdir(cwd) + return test_result + main_class = get_main_class_name(base_filename + ".java") + if not main_class: + _LOG.debug("Can't find a main class, using the file name instead") + main_class = base_filename + results.append(self._bench_command("java", main_class)) + else: + try: + compiler_flag = compiler_flag.replace("\\-", "-") + out, err, rtcode = self.env.run_command(self.env.mj_run_cmd, compiler_flag, base_filename + ".java", + timeout=timeout) + if rtcode != 0: + test_result.incorrect_msg = "file can't be compiled" + test_result.set_error_code(rtcode) + test_result.add_long_text("Error output", err.decode()) + test_result.add_long_text("Output", out.decode()) + test_result.add_file("Source file", self.preprocessed_file) + os.chdir(cwd) + return test_result + except SigKill as sig: + test_result.incorrect_msg = "file can't be compiled: " + sig.name + test_result.set_error_code(sig.retcode) + test_result.add_file("Source file", self.preprocessed_file) + os.chdir(cwd) + return test_result + except: + os.chdir(cwd) + raise + results.append(self._bench_command("./a.out")) + os.chdir(cwd) + assert len(results) == 2 + if not results[0].is_correct or not results[1].is_correct: + incorrect_flags = [self.env.bench_compiler_flags[i] for (i, res) in enumerate(results) if not res.is_correct] + test_result.incorrect_msg = "Running with {} failed".format(", ".join(incorrect_flags)) + test_result.has_succeeded = False + return test_result + msg_parts = [] + stddev = max(results[0].stddev() / results[0].mean(), results[1].stddev() / results[1].mean()) + rel_min = results[0].min() / results[1].min() + msg_parts.append("min(0/1) = {:.0%}".format(rel_min)) + rel_mean = results[0].mean() / results[1].mean() + msg_parts.append("mean(0/1) = {:.0%}".format(rel_mean)) + msg_parts.append("-1 / std = {:.0%}".format((rel_mean - 1) / stddev)) + for (i, res) in enumerate(results): + test_result.add_short_text("min({})".format(i), res.min()) + test_result.add_short_text("mean({})".format(i), res.mean()) + test_result.add_short_text("stddev({})".format(i), res.stddev()) + if (rel_mean - 1) / stddev <= 1: + msg_parts.append("first faster") + test_result.incorrect_msg = ", ".join(msg_parts) + test_result.has_succeeded = False + return test_result + test_result.correct_msg = ", ".join(msg_parts) + test_result.has_succeeded = True + return test_result + + + +TestCase.TEST_CASE_CLASSES[TestMode.bench].append(BenchExecTest) diff --git a/mjtest/test/tests.py b/mjtest/test/tests.py index a5a1b6dede2b8745196e90d4a5b1d293543de9bb..7439b716de8f66460011b79840d8ecfa0f9aaf94 100644 --- a/mjtest/test/tests.py +++ b/mjtest/test/tests.py @@ -173,7 +173,7 @@ class TestSuite: print(colored("[{result:7s}] {tc:40s}".format( result="SUCCESS" if ret.is_correct() else "FAIL", tc=test_case.name()), color, attrs=["bold"]) + - colored("" if ret.is_correct() else ret.short_message(), color)) + colored(ret.short_message(), color)) try: if self.env.produce_reports and (self.env.produce_all_reports or not ret.is_correct()): if not exists(self.env.report_dir): @@ -304,6 +304,7 @@ class ExtensibleTestResult(TestResult): self.incorrect_msg = None # type: Optional[str] self.has_succeeded = True # type: bool self._contains_error_str = True # type: bool + self.correct_msg = "correct" # type: str def add_error_output(self, title: str, error_output: str): """ @@ -334,7 +335,7 @@ class ExtensibleTestResult(TestResult): def short_message(self) -> str: if self.is_correct(): - return "correct" + return self.correct_msg else: if not self.succeeded() and not self.test_case.should_succeed() and not self._contains_error_str: return "the error output doesn't contain the word \"error\"" @@ -549,4 +550,5 @@ TestCase.TEST_CASE_CLASSES[TestMode.lexer].append(LexerDiffTest) import mjtest.test.syntax_tests import mjtest.test.ast_tests import mjtest.test.semantic_tests -import mjtest.test.exec_tests \ No newline at end of file +import mjtest.test.exec_tests +import mjtest.test.bench \ No newline at end of file