From 120f0228c5617df254e3b2bd698bc59a85d2f0c9 Mon Sep 17 00:00:00 2001 From: Dmitry Gozman Date: Mon, 13 Nov 2023 11:30:16 -0800 Subject: [PATCH] feat(selector generator): try improving text candidate with heuristics (#28074) - Drop number-like prefixes and/or suffixes. - Trim long texts to a word boundary around 15-25 character. --- .../src/server/injected/selectorGenerator.ts | 91 +++++++++++++++---- tests/library/selector-generator.spec.ts | 33 ++++++- 2 files changed, 104 insertions(+), 20 deletions(-) diff --git a/packages/playwright-core/src/server/injected/selectorGenerator.ts b/packages/playwright-core/src/server/injected/selectorGenerator.ts index e353ad1556..edeb2b5b8e 100644 --- a/packages/playwright-core/src/server/injected/selectorGenerator.ts +++ b/packages/playwright-core/src/server/injected/selectorGenerator.ts @@ -14,7 +14,7 @@ * limitations under the License. */ -import { cssEscape, escapeForAttributeSelector, escapeForTextSelector, normalizeWhiteSpace, quoteCSSAttributeValue, trimString } from '../../utils/isomorphic/stringUtils'; +import { cssEscape, escapeForAttributeSelector, escapeForTextSelector, normalizeWhiteSpace, quoteCSSAttributeValue } from '../../utils/isomorphic/stringUtils'; import { closestCrossShadow, isInsideScope, parentElementOrShadowHost } from './domUtils'; import type { InjectedScript } from './injectedScript'; import { getAriaRole, getElementAccessibleName, beginAriaCaches, endAriaCaches } from './roleUtils'; @@ -229,16 +229,18 @@ function buildNoTextCandidates(injectedScript: InjectedScript, element: Element, if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') { const input = element as HTMLInputElement | HTMLTextAreaElement; if (input.placeholder) { - candidates.push({ engine: 'internal:attr', selector: `[placeholder=${escapeForAttributeSelector(input.placeholder, false)}]`, score: kPlaceholderScore }); candidates.push({ engine: 'internal:attr', selector: `[placeholder=${escapeForAttributeSelector(input.placeholder, true)}]`, score: kPlaceholderScoreExact }); + for (const alternative of suitableTextAlternatives(input.placeholder)) + candidates.push({ engine: 'internal:attr', selector: `[placeholder=${escapeForAttributeSelector(alternative.text, false)}]`, score: kPlaceholderScore - alternative.scoreBouns }); } } const labels = getElementLabels(injectedScript._evaluator._cacheText, element); for (const label of labels) { const labelText = label.full.trim(); - candidates.push({ engine: 'internal:label', selector: escapeForTextSelector(labelText, false), score: kLabelScore }); candidates.push({ engine: 'internal:label', selector: escapeForTextSelector(labelText, true), score: kLabelScoreExact }); + for (const alternative of suitableTextAlternatives(labelText)) + candidates.push({ engine: 'internal:label', selector: escapeForTextSelector(alternative.text, false), score: kLabelScore - alternative.scoreBouns }); } const ariaRole = getAriaRole(element); @@ -265,36 +267,43 @@ function buildTextCandidates(injectedScript: InjectedScript, element: Element, i return []; const candidates: SelectorToken[][] = []; - if (element.getAttribute('title')) { - candidates.push([{ engine: 'internal:attr', selector: `[title=${escapeForAttributeSelector(element.getAttribute('title')!, false)}]`, score: kTitleScore }]); - candidates.push([{ engine: 'internal:attr', selector: `[title=${escapeForAttributeSelector(element.getAttribute('title')!, true)}]`, score: kTitleScoreExact }]); + const title = element.getAttribute('title'); + if (title) { + candidates.push([{ engine: 'internal:attr', selector: `[title=${escapeForAttributeSelector(title, true)}]`, score: kTitleScoreExact }]); + for (const alternative of suitableTextAlternatives(title)) + candidates.push([{ engine: 'internal:attr', selector: `[title=${escapeForAttributeSelector(alternative.text, false)}]`, score: kTitleScore - alternative.scoreBouns }]); } - if (element.getAttribute('alt') && ['APPLET', 'AREA', 'IMG', 'INPUT'].includes(element.nodeName)) { - candidates.push([{ engine: 'internal:attr', selector: `[alt=${escapeForAttributeSelector(element.getAttribute('alt')!, false)}]`, score: kAltTextScore }]); - candidates.push([{ engine: 'internal:attr', selector: `[alt=${escapeForAttributeSelector(element.getAttribute('alt')!, true)}]`, score: kAltTextScoreExact }]); + const alt = element.getAttribute('alt'); + if (alt && ['APPLET', 'AREA', 'IMG', 'INPUT'].includes(element.nodeName)) { + candidates.push([{ engine: 'internal:attr', selector: `[alt=${escapeForAttributeSelector(alt, true)}]`, score: kAltTextScoreExact }]); + for (const alternative of suitableTextAlternatives(alt)) + candidates.push([{ engine: 'internal:attr', selector: `[alt=${escapeForAttributeSelector(alternative.text, false)}]`, score: kAltTextScore - alternative.scoreBouns }]); } - const fullText = normalizeWhiteSpace(elementText(injectedScript._evaluator._cacheText, element).full); - const text = trimString(fullText, 80); + const text = normalizeWhiteSpace(elementText(injectedScript._evaluator._cacheText, element).full); if (text) { - const escaped = escapeForTextSelector(text, false); + const alternatives = suitableTextAlternatives(text); if (isTargetNode) { - candidates.push([{ engine: 'internal:text', selector: escaped, score: kTextScore }]); - candidates.push([{ engine: 'internal:text', selector: escapeForTextSelector(text, true), score: kTextScoreExact }]); + if (text.length <= 80) + candidates.push([{ engine: 'internal:text', selector: escapeForTextSelector(text, true), score: kTextScoreExact }]); + for (const alternative of alternatives) + candidates.push([{ engine: 'internal:text', selector: escapeForTextSelector(alternative.text, false), score: kTextScore - alternative.scoreBouns }]); } const cssToken: SelectorToken = { engine: 'css', selector: cssEscape(element.nodeName.toLowerCase()), score: kCSSTagNameScore }; - candidates.push([cssToken, { engine: 'internal:has-text', selector: escaped, score: kTextScore }]); - if (fullText.length <= 80) - candidates.push([cssToken, { engine: 'internal:has-text', selector: '/^' + escapeRegExp(fullText) + '$/', score: kTextScoreRegex }]); + for (const alternative of alternatives) + candidates.push([cssToken, { engine: 'internal:has-text', selector: escapeForTextSelector(alternative.text, false), score: kTextScore - alternative.scoreBouns }]); + if (text.length <= 80) + candidates.push([cssToken, { engine: 'internal:has-text', selector: '/^' + escapeRegExp(text) + '$/', score: kTextScoreRegex }]); } const ariaRole = getAriaRole(element); if (ariaRole && !['none', 'presentation'].includes(ariaRole)) { const ariaName = getElementAccessibleName(element, false); if (ariaName) { - candidates.push([{ engine: 'internal:role', selector: `${ariaRole}[name=${escapeForAttributeSelector(ariaName, false)}]`, score: kRoleWithNameScore }]); candidates.push([{ engine: 'internal:role', selector: `${ariaRole}[name=${escapeForAttributeSelector(ariaName, true)}]`, score: kRoleWithNameScoreExact }]); + for (const alternative of suitableTextAlternatives(ariaName)) + candidates.push([{ engine: 'internal:role', selector: `${ariaRole}[name=${escapeForAttributeSelector(alternative.text, false)}]`, score: kRoleWithNameScore - alternative.scoreBouns }]); } } @@ -466,3 +475,49 @@ function escapeRegExp(s: string) { // From https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string } + +function trimWordBoundary(text: string, maxLength: number) { + if (text.length <= maxLength) + return text; + text = text.substring(0, maxLength); + // Find last word boundary in the text. + const match = text.match(/^(.*)\b(.+?)$/); + if (!match) + return ''; + return match[1].trimEnd(); +} + +function suitableTextAlternatives(text: string) { + let result: { text: string, scoreBouns: number }[] = []; + + { + const match = text.match(/^([\d.,]+)[^.,\w]/); + const leadingNumberLength = match ? match[1].length : 0; + if (leadingNumberLength) { + const alt = text.substring(leadingNumberLength).trimStart(); + result.push({ text: alt, scoreBouns: alt.length <= 30 ? 2 : 1 }); + } + } + + { + const match = text.match(/[^.,\w]([\d.,]+)$/); + const trailingNumberLength = match ? match[1].length : 0; + if (trailingNumberLength) { + const alt = text.substring(0, text.length - trailingNumberLength).trimEnd(); + result.push({ text: alt, scoreBouns: alt.length <= 30 ? 2 : 1 }); + } + } + + if (text.length <= 30) { + result.push({ text, scoreBouns: 0 }); + } else { + result.push({ text: trimWordBoundary(text, 80), scoreBouns: 0 }); + result.push({ text: trimWordBoundary(text, 30), scoreBouns: 1 }); + } + + result = result.filter(r => r.text); + if (!result.length) + result.push({ text: text.substring(0, 80), scoreBouns: 0 }); + + return result; +} diff --git a/tests/library/selector-generator.spec.ts b/tests/library/selector-generator.spec.ts index 44093d0a5e..6b87b6171c 100644 --- a/tests/library/selector-generator.spec.ts +++ b/tests/library/selector-generator.spec.ts @@ -54,10 +54,38 @@ it.describe('selector generator', () => { }); it('should trim text', async ({ page }) => { - await page.setContent(`
Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789
`); + await page.setContent(` +
Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789
+
Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789!Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789
+ `); expect(await generate(page, 'div')).toBe('internal:text="Text0123456789Text0123456789Text0123456789Text0123456789Text0123456789Text012345"i'); }); + it('should try to improve role name', async ({ page }) => { + await page.setContent(`
Issues 23
`); + expect(await generate(page, 'div')).toBe('internal:role=button[name="Issues"i]'); + }); + + it('should try to improve text', async ({ page }) => { + await page.setContent(`
23 Issues
`); + expect(await generate(page, 'div')).toBe('internal:text="Issues"i'); + }); + + it('should try to improve text by shortening', async ({ page }) => { + await page.setContent(`
Longest verbose description of the item
`); + expect(await generate(page, 'div')).toBe('internal:text="Longest verbose description"i'); + }); + + it('should try to improve label text by shortening', async ({ page }) => { + await page.setContent(``); + expect(await generate(page, 'input')).toBe('internal:label="Longest verbose description"i'); + }); + + it('should not improve guid text', async ({ page }) => { + await page.setContent(`
91b1b23
`); + expect(await generate(page, 'div')).toBe('internal:text="91b1b23"i'); + }); + it('should not escape text with >>', async ({ page }) => { await page.setContent(`
text>>text
`); expect(await generate(page, 'div')).toBe('internal:text="text>>text"i'); @@ -206,9 +234,10 @@ it.describe('selector generator', () => {
Text that goes on and on and on and on and on and on and on and on and on and on and on and on and on and on and on
+
Text that goes on and on and on and on and on and on and on and on and X on and on and on and on and on and on and on
`); - expect(await generate(page, '#id > div')).toBe(`#id >> internal:text="Text that goes on and on and on and on and on and on and on and on and on and on"i`); + expect(await generate(page, '#id > div')).toBe(`#id >> internal:text="Text that goes on and on and on and on and on and on and on and on and on and"i`); }); it('should use nested ordinals', async ({ page }) => {