playwright/src/server/inspector/injected/selectorGenerator.ts
2021-01-24 08:44:11 -08:00

364 lines
14 KiB
TypeScript

/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import type InjectedScript from '../../injected/injectedScript';
export function generateSelector(injectedScript: InjectedScript, targetElement: Element): { selector: string, elements: Element[] } {
const path: SelectorToken[] = [];
let numberOfMatchingElements = Number.MAX_SAFE_INTEGER;
for (let element: Element | null = targetElement; element && element !== document.documentElement; element = parentElementOrShadowHost(element)) {
const selector = buildSelectorCandidate(element);
if (!selector)
continue;
const fullSelector = joinSelector([selector, ...path]);
const parsedSelector = injectedScript.parseSelector(fullSelector);
const selectorTargets = injectedScript.querySelectorAll(parsedSelector, targetElement.ownerDocument);
if (!selectorTargets.length)
break;
if (selectorTargets[0] === targetElement)
return { selector: fullSelector, elements: selectorTargets };
if (selectorTargets.length && numberOfMatchingElements > selectorTargets.length) {
numberOfMatchingElements = selectorTargets.length;
path.unshift(selector);
}
}
if (document.documentElement === targetElement) {
return {
selector: '/html',
elements: [document.documentElement]
};
}
const selector =
createXPath(document.documentElement, targetElement) ||
cssSelectorForElement(injectedScript, targetElement);
const parsedSelector = injectedScript.parseSelector(selector);
return {
selector,
elements: injectedScript.querySelectorAll(parsedSelector, targetElement.ownerDocument)
};
}
function buildSelectorCandidate(element: Element): SelectorToken | null {
const nodeName = element.nodeName.toLowerCase();
for (const attribute of ['data-testid', 'data-test-id', 'data-test']) {
if (element.hasAttribute(attribute))
return { engine: 'css', selector: `${nodeName}[${attribute}=${quoteString(element.getAttribute(attribute)!)}]` };
}
for (const attribute of ['aria-label', 'role']) {
if (element.hasAttribute(attribute))
return { engine: 'css', selector: `${element.nodeName.toLocaleLowerCase()}[${attribute}=${quoteString(element.getAttribute(attribute)!)}]` };
}
if (['INPUT', 'TEXTAREA'].includes(element.nodeName)) {
const nodeNameLowercase = element.nodeName.toLowerCase();
if (element.getAttribute('name'))
return { engine: 'css', selector: `${nodeNameLowercase}[name=${quoteString(element.getAttribute('name')!)}]` };
if (element.getAttribute('placeholder'))
return { engine: 'css', selector: `${nodeNameLowercase}[placeholder=${quoteString(element.getAttribute('placeholder')!)}]` };
if (element.getAttribute('type'))
return { engine: 'css', selector: `${nodeNameLowercase}[type=${quoteString(element.getAttribute('type')!)}]` };
} else if (element.nodeName === 'IMG') {
if (element.getAttribute('alt'))
return { engine: 'css', selector: `img[alt=${quoteString(element.getAttribute('alt')!)}]` };
}
const textSelector = textSelectorForElement(element);
if (textSelector)
return { engine: 'text', selector: textSelector };
// De-prioritize id, but still use it as a last resort.
const idAttr = element.getAttribute('id');
if (idAttr && !isGuidLike(idAttr))
return { engine: 'css', selector: `${nodeName}[id=${quoteString(idAttr!)}]` };
return null;
}
function parentElementOrShadowHost(element: Element): Element | null {
if (element.parentElement)
return element.parentElement;
if (!element.parentNode)
return null;
if (element.parentNode.nodeType === Node.DOCUMENT_FRAGMENT_NODE && (element.parentNode as ShadowRoot).host)
return (element.parentNode as ShadowRoot).host;
return null;
}
function cssSelectorForElement(injectedScript: InjectedScript, targetElement: Element): string {
const root: Node = targetElement.ownerDocument;
const tokens: string[] = [];
function uniqueCSSSelector(prefix?: string): string | undefined {
const path = tokens.slice();
if (prefix)
path.unshift(prefix);
const selector = path.join(' ');
const parsedSelector = injectedScript.parseSelector(selector);
const node = injectedScript.querySelector(parsedSelector, targetElement.ownerDocument);
return node === targetElement ? selector : undefined;
}
for (let element: Element | null = targetElement; element && element !== root; element = parentElementOrShadowHost(element)) {
const nodeName = element.nodeName.toLowerCase();
// Element ID is the strongest signal, use it.
let bestTokenForLevel: string = '';
if (element.id) {
const token = /^[a-zA-Z][a-zA-Z0-9\-\_]+$/.test(element.id) ? '#' + element.id : `[id="${element.id}"]`;
const selector = uniqueCSSSelector(token);
if (selector)
return selector;
bestTokenForLevel = token;
}
const parent = element.parentNode as (Element | ShadowRoot);
// Combine class names until unique.
const classes = Array.from(element.classList);
for (let i = 0; i < classes.length; ++i) {
const token = '.' + classes.slice(0, i + 1).join('.');
const selector = uniqueCSSSelector(token);
if (selector)
return selector;
// Even if not unique, does this subset of classes uniquely identify node as a child?
if (!bestTokenForLevel && parent) {
const sameClassSiblings = parent.querySelectorAll(token);
if (sameClassSiblings.length === 1)
bestTokenForLevel = token;
}
}
// Ordinal is the weakest signal.
if (parent) {
const siblings = Array.from(parent.children);
const sameTagSiblings = siblings.filter(sibling => (sibling).nodeName.toLowerCase() === nodeName);
const token = sameTagSiblings.indexOf(element) === 0 ? nodeName : `${nodeName}:nth-child(${1 + siblings.indexOf(element)})`;
const selector = uniqueCSSSelector(token);
if (selector)
return selector;
if (!bestTokenForLevel)
bestTokenForLevel = token;
} else if (!bestTokenForLevel) {
bestTokenForLevel = nodeName;
}
tokens.unshift(bestTokenForLevel);
}
return uniqueCSSSelector()!;
}
function textSelectorForElement(node: Node): string | null {
const maxLength = 30;
let needsRegex = false;
let trimmedText: string | null = null;
for (const child of node.childNodes) {
if (child.nodeType !== Node.TEXT_NODE)
continue;
if (child.textContent && child.textContent.trim()) {
if (trimmedText)
return null;
trimmedText = child.textContent.trim().substr(0, maxLength);
needsRegex = child.textContent !== trimmedText;
} else {
needsRegex = true;
}
}
if (!trimmedText)
return null;
return needsRegex ? `/.*${escapeForRegex(trimmedText)}.*/` : `"${trimmedText}"`;
}
function escapeForRegex(text: string): string {
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function quoteString(text: string): string {
return `"${text.replaceAll(/"/g, '\\"')}"`;
}
type SelectorToken = {
engine: string;
selector: string;
};
function joinSelector(path: SelectorToken[]): string {
const tokens = [];
let lastEngine = '';
for (const { engine, selector } of path) {
if (tokens.length && (lastEngine !== 'css' || engine !== 'css'))
tokens.push('>>');
lastEngine = engine;
if (engine === 'css')
tokens.push(selector);
else
tokens.push(`${engine}=${selector}`);
}
return tokens.join(' ');
}
function isGuidLike(id: string): boolean {
let lastCharacterType: 'lower' | 'upper' | 'digit' | 'other' | undefined;
let transitionCount = 0;
for (let i = 0; i < id.length; ++i) {
const c = id[i];
let characterType: 'lower' | 'upper' | 'digit' | 'other';
if (c === '-' || c === '_')
continue;
if (c >= 'a' && c <= 'z')
characterType = 'lower';
else if (c >= 'A' && c <= 'Z')
characterType = 'upper';
else if (c >= '0' && c <= '9')
characterType = 'digit';
else
characterType = 'other';
if (characterType === 'lower' && lastCharacterType === 'upper') {
lastCharacterType = characterType;
continue;
}
if (lastCharacterType && lastCharacterType !== characterType)
++transitionCount;
lastCharacterType = characterType;
}
return transitionCount >= id.length / 4;
}
function createXPath(root: Node, targetElement: Element): string | undefined {
const maxTextLength = 80;
const minMeaningfulSelectorLegth = 100;
const maybeDocument = root instanceof Document ? root : root.ownerDocument;
if (!maybeDocument)
return;
const document = maybeDocument;
const xpathCache = new Map<string, Element[]>();
const tokens: string[] = [];
function evaluateXPath(expression: string): Element[] {
let nodes: Element[] | undefined = xpathCache.get(expression);
if (!nodes) {
nodes = [];
try {
const result = document.evaluate(expression, root, null, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
for (let node = result.iterateNext(); node; node = result.iterateNext()) {
if (node.nodeType === Node.ELEMENT_NODE)
nodes.push(node as Element);
}
} catch (e) {
}
xpathCache.set(expression, nodes);
}
return nodes;
}
function uniqueXPathSelector(prefix?: string): string | undefined {
const path = tokens.slice();
if (prefix)
path.unshift(prefix);
let selector = '//' + path.join('/');
while (selector.includes('///'))
selector = selector.replace('///', '//');
if (selector.endsWith('/'))
selector = selector.substring(0, selector.length - 1);
const nodes: Element[] = evaluateXPath(selector);
if (nodes[0] === targetElement)
return selector;
// If we are looking at a small set of elements with long selector, fall back to ordinal.
if (nodes.length < 5 && selector.length > minMeaningfulSelectorLegth) {
const index = nodes.indexOf(targetElement);
if (index !== -1)
return `(${selector})[${index + 1}]`;
}
return undefined;
}
function escapeAndCap(text: string) {
text = text.substring(0, maxTextLength);
// XPath 1.0 does not support quote escaping.
// 1. If there are no single quotes - use them.
if (text.indexOf(`'`) === -1)
return `'${text}'`;
// 2. If there are no double quotes - use them to enclose text.
if (text.indexOf(`"`) === -1)
return `"${text}"`;
// 3. Otherwise, use popular |concat| trick.
const Q = `'`;
return `concat(${text.split(Q).map(token => Q + token + Q).join(`, "'", `)})`;
}
const defaultAttributes = new Set([ 'title', 'aria-label', 'disabled', 'role' ]);
const importantAttributes = new Map<string, string[]>([
[ 'form', [ 'action' ] ],
[ 'img', [ 'alt' ] ],
[ 'input', [ 'placeholder', 'type', 'name' ] ],
[ 'textarea', [ 'placeholder', 'type', 'name' ] ],
]);
let usedTextConditions = false;
for (let element: Element | null = targetElement; element && element !== root; element = element.parentElement) {
const nodeName = element.nodeName.toLowerCase();
const tag = nodeName === 'svg' ? '*' : nodeName;
const tagConditions = [];
if (nodeName === 'svg')
tagConditions.push('local-name()="svg"');
const attrConditions: string[] = [];
const importantAttrs = [ ...defaultAttributes, ...(importantAttributes.get(tag) || []) ];
for (const attr of importantAttrs) {
const value = element.getAttribute(attr);
if (value && value.length < maxTextLength)
attrConditions.push(`normalize-space(@${attr})=${escapeAndCap(value)}`);
else if (value)
attrConditions.push(`starts-with(normalize-space(@${attr}), ${escapeAndCap(value)})`);
}
const text = document.evaluate('normalize-space(.)', element).stringValue;
const textConditions = [];
if (tag !== 'select' && text.length && !usedTextConditions) {
if (text.length < maxTextLength)
textConditions.push(`normalize-space(.)=${escapeAndCap(text)}`);
else
textConditions.push(`starts-with(normalize-space(.), ${escapeAndCap(text)})`);
usedTextConditions = true;
}
// Always retain the last tag.
const conditions = [ ...tagConditions, ...textConditions, ...attrConditions ];
const token = conditions.length ? `${tag}[${conditions.join(' and ')}]` : (tokens.length ? '' : tag);
const selector = uniqueXPathSelector(token);
if (selector)
return selector;
const parent = element.parentElement;
let ordinal = -1;
if (parent) {
const siblings = Array.from(parent.children);
const sameTagSiblings = siblings.filter(sibling => (sibling).nodeName.toLowerCase() === nodeName);
if (sameTagSiblings.length > 1)
ordinal = sameTagSiblings.indexOf(element);
}
// Do not include text into this token, only tag / attributes.
// Topmost node will get all the text.
const conditionsString = conditions.length ? `[${conditions.join(' and ')}]` : '';
const ordinalString = ordinal >= 0 ? `[${ordinal + 1}]` : '';
tokens.unshift(`${tag}${ordinalString}${conditionsString}`);
}
return uniqueXPathSelector();
}