feat: 增加判断题识别

This commit is contained in:
chuan
2026-06-03 00:08:41 +08:00
Unverified
parent d1ffa9ba05
commit ea74eaff0b
+63 -2
View File
@@ -242,25 +242,48 @@ class AnswerRunner:
)
text = body.inner_text(timeout=self.settings.timeout_ms)
match = re.search(r"正确答案[:]\s*([A-Z,,、\s]+)", text)
match = re.search(r"正确答案[:]\s*([^\r\n]+)", text)
if not match:
self._print_debug_page(page, "Correct answer regex did not match")
raise SystemExit("Correct answer not found after opening answer analysis.")
return "".join(re.findall(r"[A-Z]", match.group(1).upper()))
raw_answer = match.group(1).strip()
letter_answer = "".join(re.findall(r"[A-Z]", raw_answer.upper()))
return letter_answer or raw_answer
def _select_answer_options(self, page, answer: str) -> None:
if not answer:
raise SystemExit("Empty answer.")
if not re.fullmatch(r"[A-Z]+", answer):
self._select_text_answer_option(page, answer)
return
for letter in answer:
option = page.locator("li").filter(has_text=re.compile(rf"^\s*{letter}\s*\."))
if option.count() == 0:
option = page.get_by_text(re.compile(rf"^\s*{letter}\s*\.")).locator("..")
if option.count() == 0:
self._print_debug_page(page, f"Option not found: {letter}")
raise SystemExit(f"Option not found: {letter}")
option.first.click()
time.sleep(0.2)
def _select_text_answer_option(self, page, answer: str) -> None:
options = page.locator("li")
for index in range(options.count()):
option = options.nth(index)
try:
if option.inner_text(timeout=1000).strip() == answer:
option.click()
time.sleep(0.2)
return
except Exception:
continue
self._print_debug_page(page, f"Text answer option not found: {answer}")
raise SystemExit(f"Text answer option not found: {answer}")
def _go_next_question(self, page) -> bool:
before = self._current_question_no(page)
next_button = page.get_by_text("下一题", exact=True)
@@ -280,6 +303,7 @@ class AnswerRunner:
)
return True
except Exception:
self._print_debug_page(page, "Next question did not advance")
return False
def _current_question_no(self, page) -> str:
@@ -356,6 +380,43 @@ class AnswerRunner:
)
state_path.write_text(json.dumps(data, ensure_ascii=False, indent=2))
def _print_debug_page(self, page, reason: str) -> None:
print("\n===== DEBUG PAGE START =====")
print(f"reason: {reason}")
print(f"url: {page.url}")
print(f"title: {page.title()}")
try:
body_text = page.locator("body").inner_text(timeout=3000)
except Exception as error:
body_text = f"<failed to read body text: {error}>"
print("----- body text -----")
print(body_text)
for selector, label in [
("button", "buttons"),
("li", "listitems"),
("[role='tab']", "tabs"),
("input", "inputs"),
]:
print(f"----- {label} -----")
try:
items = page.locator(selector)
for index in range(min(items.count(), 80)):
text = items.nth(index).inner_text(timeout=1000).strip()
print(f"{index}: {text!r}")
except Exception as error:
print(f"<failed to read {label}: {error}>")
print("----- html snippet -----")
try:
html = page.locator("body").evaluate("el => el.outerHTML")
print(html[:5000])
except Exception as error:
print(f"<failed to read html: {error}>")
print("===== DEBUG PAGE END =====\n")
def _print_page_summary(self, page) -> None:
text = page.locator("body").inner_text(timeout=self.settings.timeout_ms)
summary = " ".join(text.split())[:1000]