feat(text selector): normalize whitespace for quoted match (#5049)

This changes quoted text selector like `text="Foo Bar"` to perform normalized whitespace match. Most of the time users want to match some string visible on the page, and that always means normalized whitespace. We keep the case sensitivity and full-string vs substring difference between quoted and unquoted matches.
2021-01-19 09:30:34 -08:00 · 2021-01-19 09:30:34 -08:00 · 0586c2554f
parent 9e3bd78649
commit 0586c2554f
3 changed files with 25 additions and 20 deletions
--- a/docs/src/selectors.md
+++ b/docs/src/selectors.md
@ -25,13 +25,10 @@ await page.click("text=Log in")
 page.click("text=Log in")
 ```

-By default, the match is case-insensitive, it ignores leading/trailing whitespace and searches for
-a substring. This means `text= Login` matches `<button>Button loGIN (click me)</button>`.
+Matching is case-insensitive and searches for a substring. This means `text=Login` matches `<button>Button loGIN (click me)</button>`. Matching also normalizes whitespace, for example it turns multiple spaces into one, turns line breaks into spaces and ignores leading and trailing whitespace.

-Text body can be escaped with single or double quotes for precise matching, insisting on exact match,
-including specified whitespace and case. This means `text="Login "` will only match
-`<button>Login </button>` with exactly one space after "Login". Quoted text follows the usual escaping
-rules, e.g. use `\"` to escape double quote in a double-quoted string: `text="foo\"bar"`.
+Text body can be escaped with single or double quotes for full-string case-sensitive match instead. This means `text="Login"` will match `<button>Login</button>`, but not `<button>Login (click me)</button>` or `<button>login</button>`. Quoted text follows the usual escaping
+rules, e.g. use `\"` to escape double quote in a double-quoted string: `text="foo\"bar"`.  Note that quoted match still normalizes whitespace.

 Text body can also be a JavaScript-like regex wrapped in `/` symbols. This means `text=/^\\s*Login$/i`
 will match `<button> loGIN</button>` with any number of spaces before "Login" and no spaces after.
@ -156,7 +153,7 @@ The `:text` pseudo-class matches elements that have a text node child with speci
 It is similar to the [text] engine, but can be used in combination with other `css` selector extensions.
 There are a few variations that support different arguments:

-* `:text("substring")` - Matches when element's text contains "substring" somewhere. Matching is case-insensitive. Matching also normalizes whitespace, for example it turns multiple spaces into one, trusn line breaks into spaces and ignores leading and trailing whitespace.
+* `:text("substring")` - Matches when element's text contains "substring" somewhere. Matching is case-insensitive. Matching also normalizes whitespace, for example it turns multiple spaces into one, turns line breaks into spaces and ignores leading and trailing whitespace.
 * `:text-is("string")` - Matches when element's text equals the "string". Matching is case-insensitive and normalizes whitespace.
 * `button:text("Sign in")` - Text selector may be combined with regular CSS.
 * `:text-matches("[+-]?\\d+")` - Matches text against a regular expression. Note that special characters like back-slash `\`, quotes `"`, square brackets `[]` and more should be escaped. Learn more about [regular expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp).
--- a/src/server/injected/textSelectorEngine.ts
+++ b/src/server/injected/textSelectorEngine.ts
@ -46,21 +46,29 @@ function unescape(s: string): string {

 type Matcher = (text: string) => boolean;
 function createMatcher(selector: string): Matcher {
-  if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
-    const parsed = unescape(selector.substring(1, selector.length - 1));
-    return text => text === parsed;
-  }
-  if (selector.length > 1 && selector[0] === "'" && selector[selector.length - 1] === "'") {
-    const parsed = unescape(selector.substring(1, selector.length - 1));
-    return text => text === parsed;
-  }
  if (selector[0] === '/' && selector.lastIndexOf('/') > 0) {
    const lastSlash = selector.lastIndexOf('/');
    const re = new RegExp(selector.substring(1, lastSlash), selector.substring(lastSlash + 1));
    return text => re.test(text);
  }
-  selector = selector.trim().toLowerCase().replace(/\s+/g, ' ');
-  return text => text.toLowerCase().replace(/\s+/g, ' ').includes(selector);
+  let strict = false;
+  if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
+    selector = unescape(selector.substring(1, selector.length - 1));
+    strict = true;
+  }
+  if (selector.length > 1 && selector[0] === "'" && selector[selector.length - 1] === "'") {
+    selector = unescape(selector.substring(1, selector.length - 1));
+    strict = true;
+  }
+  selector = selector.trim().replace(/\s+/g, ' ');
+  if (!strict)
+    selector = selector.toLowerCase();
+  return text => {
+    text = text.trim().replace(/\s+/g, ' ');
+    if (!strict)
+      return text.toLowerCase().includes(selector);
+    return text === selector;
+  };
 }

 // Skips <head>, <script> and <style> elements and all their children.
--- a/test/selectors-text.spec.ts
+++ b/test/selectors-text.spec.ts
@ -26,7 +26,7 @@ it('should work', async ({page}) => {
  expect(await page.$eval(`text=ye`, e => e.outerHTML)).toBe('<div>\nye  </div>');

  await page.setContent(`<div> ye </div><div>ye</div>`);
-  expect(await page.$eval(`text="ye"`, e => e.outerHTML)).toBe('<div>ye</div>');
+  expect(await page.$eval(`text="ye"`, e => e.outerHTML)).toBe('<div> ye </div>');

  await page.setContent(`<div>yo</div><div>"ya</div><div> hello world! </div>`);
  expect(await page.$eval(`text="\\"ya"`, e => e.outerHTML)).toBe('<div>"ya</div>');
@ -98,9 +98,9 @@ it('should work', async ({page}) => {
  await page.setContent(`<span>Sign&nbsp;in</span><span>Hello\n \nworld</span>`);
  expect(await page.$eval(`text=Sign in`, e => e.outerHTML)).toBe('<span>Sign&nbsp;in</span>');
  expect((await page.$$(`text=Sign \tin`)).length).toBe(1);
-  expect(await page.$(`text="Sign in"`)).toBe(null);
-  expect((await page.$$(`text="Sign in"`)).length).toBe(0);
+  expect((await page.$$(`text="Sign in"`)).length).toBe(1);
  expect(await page.$eval(`text=lo wo`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
+  expect(await page.$eval(`text="Hello world"`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
  expect(await page.$(`text="lo wo"`)).toBe(null);
  expect((await page.$$(`text=lo \nwo`)).length).toBe(1);
  expect((await page.$$(`text="lo wo"`)).length).toBe(0);