feat(selectors): introduce zs selector engine (#214)

This commit is contained in:
Dmitry Gozman 2019-12-12 09:02:37 -08:00 committed by GitHub
parent 856787a8e6
commit 59b0900321
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 983 additions and 5 deletions

View file

@ -3829,6 +3829,7 @@ All methods accepting selector also accept a string shorthand which is equivalen
For convenience, selectors in the wrong format are heuristically converted to the right format: For convenience, selectors in the wrong format are heuristically converted to the right format:
- selector starting with `//` is assumed to be `xpath=selector`; - selector starting with `//` is assumed to be `xpath=selector`;
- selector starting with `"` is assumed to be `zs=selector`;
- otherwise selector is assumed to be `css=selector`. - otherwise selector is assumed to be `css=selector`.
```js ```js
@ -3838,6 +3839,9 @@ const handle = await page.$('css=div');
// queries '//html/body/div' xpath selector // queries '//html/body/div' xpath selector
const handle = await page.$('xpath=//html/body/div'); const handle = await page.$('xpath=//html/body/div');
// queries '"foo"' zs selector
const handle = await page.$('zs="foo"');
// queries 'span' css selector inside the result of '//html/body/div' xpath selector // queries 'span' css selector inside the result of '//html/body/div' xpath selector
const handle = await page.$('xpath=//html/body/div >> css=span'); const handle = await page.$('xpath=//html/body/div >> css=span');
@ -3847,6 +3851,9 @@ const handle = await page.$('div');
// converted to 'xpath=//html/body/div' // converted to 'xpath=//html/body/div'
const handle = await page.$('//html/body/div'); const handle = await page.$('//html/body/div');
// converted to 'zs="foo"'
const handle = await page.$('"foo"');
// queries 'span' css selector inside the div handle // queries 'span' css selector inside the div handle
const handle = await divHandle.$('css=span'); const handle = await divHandle.$('css=span');
``` ```

View file

@ -8,6 +8,7 @@ import * as types from './types';
import * as injectedSource from './generated/injectedSource'; import * as injectedSource from './generated/injectedSource';
import * as cssSelectorEngineSource from './generated/cssSelectorEngineSource'; import * as cssSelectorEngineSource from './generated/cssSelectorEngineSource';
import * as xpathSelectorEngineSource from './generated/xpathSelectorEngineSource'; import * as xpathSelectorEngineSource from './generated/xpathSelectorEngineSource';
import * as zsSelectorEngineSource from './generated/zsSelectorEngineSource';
import { assert, helper, debugError } from './helper'; import { assert, helper, debugError } from './helper';
import Injected from './injected/injected'; import Injected from './injected/injected';
@ -48,7 +49,7 @@ export class DOMWorld {
injected(): Promise<js.JSHandle> { injected(): Promise<js.JSHandle> {
if (!this._injectedPromise) { if (!this._injectedPromise) {
const engineSources = [cssSelectorEngineSource.source, xpathSelectorEngineSource.source]; const engineSources = [cssSelectorEngineSource.source, xpathSelectorEngineSource.source, zsSelectorEngineSource.source];
const source = ` const source = `
new (${injectedSource.source})([ new (${injectedSource.source})([
${engineSources.join(',\n')} ${engineSources.join(',\n')}
@ -431,6 +432,8 @@ function normalizeSelector(selector: string): string {
return selector; return selector;
if (selector.startsWith('//')) if (selector.startsWith('//'))
return 'xpath=' + selector; return 'xpath=' + selector;
if (selector.startsWith('"'))
return 'zs=' + selector;
return 'css=' + selector; return 'css=' + selector;
} }

View file

@ -0,0 +1,777 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import { SelectorEngine, SelectorType, SelectorRoot } from './selectorEngine';
type Token = {
combinator: '' | '>' | '~' | '^',
index?: number,
text?: string,
css?: string,
};
function tokenize(selector: string): Token[] | number {
const tokens: Token[] = [];
let pos = 0;
const skipWhitespace = () => {
while (pos < selector.length && selector[pos] === ' ')
pos++;
};
while (pos < selector.length) {
skipWhitespace();
if (pos === selector.length)
break;
if (!tokens.length && '^>~'.includes(selector[pos]))
return pos;
const token: Token = { combinator: '' };
if (selector[pos] === '^') {
token.combinator = '^';
tokens.push(token);
pos++;
continue;
}
if (selector[pos] === '>') {
token.combinator = '>';
pos++;
skipWhitespace();
if (pos === selector.length)
return pos;
} else if (selector[pos] === '~') {
token.combinator = '~';
pos++;
skipWhitespace();
if (pos === selector.length)
return pos;
}
let text = '';
let end = pos;
let stringQuote: string | undefined;
const isText = '`"\''.includes(selector[pos]);
while (end < selector.length) {
if (stringQuote) {
if (selector[end] === '\\' && end + 1 < selector.length) {
if (!isText)
text += selector[end];
text += selector[end + 1];
end += 2;
} else if (selector[end] === stringQuote) {
text += selector[end++];
stringQuote = undefined;
if (isText)
break;
} else {
text += selector[end++];
}
} else if (' >~^#'.includes(selector[end])) {
break;
} else if ('`"\''.includes(selector[end])) {
stringQuote = selector[end];
text += selector[end++];
} else {
text += selector[end++];
}
}
if (stringQuote)
return end;
if (isText)
token.text = JSON.stringify(text.substring(1, text.length - 1));
else
token.css = text;
pos = end;
if (pos < selector.length && selector[pos] === '#') {
pos++;
let end = pos;
while (end < selector.length && selector[end] >= '0' && selector[end] <= '9')
end++;
if (end === pos)
return pos;
const num = Number(selector.substring(pos, end));
if (isNaN(num))
return pos;
token.index = num;
pos = end;
}
tokens.push(token);
}
return tokens;
}
function pathFromRoot(root: SelectorRoot, targetElement: Element): (Element | SelectorRoot)[] {
let target: Element | SelectorRoot = targetElement;
const path: (Element | SelectorRoot)[] = [target];
while (target !== root) {
if (!target.parentNode || target.parentNode.nodeType !== 1 /* Node.ELEMENT_NODE */ && target.parentNode.nodeType !== 11 /* Node.DOCUMENT_FRAGMENT_NODE */)
throw new Error('Target does not belong to the root subtree');
target = target.parentNode as (Element | SelectorRoot);
path.push(target);
}
path.reverse();
return path;
}
// This is a map from a list element (parent) to a number of contained lists (immediate children).
//
// Example:
// <div>
// <span class=a><img/><img/></span>
// <span class=a/>
// <span class=a/>
// <br>
// <div class=b/>
// <div class=b/>
// <div class=b/>
// </div>
//
// Here we might have the following:
// div -> [[span, span, span], [div, div, div]]
// span -> [[img, img]]
type ListsMap = Map<Element | SelectorRoot, Element[][]>;
function detectLists(root: SelectorRoot, shouldConsider: (e: Element | SelectorRoot) => boolean, getBox: (e: Element) => ClientRect): ListsMap {
const lists: ListsMap = new Map();
const add = (map: Map<string, Element[]>, element: Element, key: string): void => {
let list = map.get(key);
if (!list) {
list = [];
map.set(key, list);
}
list.push(element);
};
const mark = (parent: Element | SelectorRoot, map: Map<string, Element[]>, used: Set<Element>): void => {
for (let list of map.values()) {
list = list.filter(item => !used.has(item));
if (list.length < 2)
continue;
let collection = lists.get(parent);
if (!collection) {
collection = [];
lists.set(parent, collection);
}
collection.push(list);
list.forEach(item => used.add(item));
}
};
// hashes list: s, vh, v, h
const kHashes = 4;
const visit = (element: Element | SelectorRoot, produceHashes: boolean): { size: number, hashes?: string[] } => {
const consider = shouldConsider(element);
let size = 1;
let maps: Map<string, Element[]>[] | undefined;
if (consider)
maps = new Array(kHashes).fill(0).map(_ => new Map());
let structure: string[] | undefined;
if (produceHashes)
structure = [element.nodeName];
for (let child = element.firstElementChild; child; child = child.nextElementSibling) {
const childResult = visit(child, consider);
size += childResult.size;
if (consider) {
for (let i = 0; i < childResult.hashes!.length; i++) {
if (childResult.hashes![i])
add(maps![i], child, childResult.hashes![i]);
}
}
if (structure)
structure.push(child.nodeName);
}
if (consider) {
const used = new Set<Element>();
maps!.forEach(map => mark(element, map, used));
}
let hashes: string[] | undefined;
if (produceHashes) {
const box = getBox(element as Element);
hashes = [];
hashes.push((structure!.length >= 4) || (size >= 10) ? structure!.join('') : '');
hashes.push(`${element.nodeName},${(size / 3) | 0},${box.height | 0},${box.width | 0}`);
if (size <= 5)
hashes.push(`${element.nodeName},${(size / 3) | 0},${box.width | 0},${box.left | 0}`);
else
hashes.push(`${element.nodeName},${(size / 3) | 0},${box.width | 0},${box.left | 0},${2 * Math.log(box.height) | 0}`);
if (size <= 5)
hashes.push(`${element.nodeName},${(size / 3) | 0},${box.height | 0},${box.top | 0}`);
else
hashes.push(`${element.nodeName},${(size / 3) | 0},${box.height | 0},${box.top | 0},${2 * Math.log(box.width) | 0}`);
}
return { size, hashes };
};
visit(root, false);
return lists;
}
type Step = {
token: Token;
// Element we point at.
element: Element | SelectorRoot;
// Distance between element and (lca between target and element).
depth: number;
// One step score.
score: number;
// Total path score.
totalScore: number;
previous?: Step;
// Repeat number for ^ steps.s
repeat?: number;
};
type Options = {
genericTagScore: number,
textScore?: number,
imgAltScore?: number,
ariaLabelScore?: number,
detectLists?: boolean,
avoidShortText?: boolean,
usePlaceholders?: boolean,
debug?: boolean
};
const defaultOptions: Options = {
genericTagScore: 10,
textScore: 1,
imgAltScore: 2,
ariaLabelScore: 2,
detectLists: true,
avoidShortText: false,
usePlaceholders: true,
debug: false,
};
type CueType = 'text' | 'tag' | 'imgAlt' | 'ariaLabel';
type Cue = {
type: CueType,
score: number,
elements: Element[],
};
type CueMap = Map<string, Cue>;
type ElementMetrics = {
box: ClientRect,
style: CSSStyleDeclaration,
fontMetric: number,
};
type Lca = {
lcaDepth: number;
lca: Element | SelectorRoot;
anchor: Element | SelectorRoot | undefined;
depth: number; // Distance to lca.
};
type PathCue = {
type: CueType,
score: number,
elements: Element[][],
anchorCount: Map<Element | SelectorRoot, number>,
};
type PreprocessResult = {
pathCues: Map<string, PathCue>,
lcaMap: Map<Element | SelectorRoot, Lca>,
};
type ListIndex = Map<Element | SelectorRoot, number>;
function parentOrRoot(element: Element | SelectorRoot): Element | SelectorRoot | null {
return element.parentNode as Element | SelectorRoot;
}
class Engine {
private _cues = new Map<Element | SelectorRoot, CueMap>();
private _metrics = new Map<Element, ElementMetrics>();
readonly options: Options;
constructor(options: Options = defaultOptions) {
this.options = options;
}
query(root: SelectorRoot, selector: string, all: boolean): Element[] {
const tokens = tokenize(selector);
if (typeof tokens === 'number')
throw new Error('Cannot parse selector at position ' + tokens);
if (!tokens.length)
throw new Error('Empty selector');
if (!this._cues.has(root)) {
const cueMap: CueMap = new Map();
const pathCues = this._preprocess(root, [root], Infinity).pathCues;
for (const [text, cue] of pathCues) {
cueMap.set(text, {
type: cue.type,
score: cue.score,
elements: cue.elements[0]
});
}
this._cues.set(root, cueMap);
}
// Map from the element to the boundary used. We never go outside the boundary when doing '~'.
let currentStep = new Map<Element | SelectorRoot, Element | SelectorRoot>();
currentStep.set(root, root);
for (const token of tokens) {
const nextStep = new Map<Element | SelectorRoot, Element | SelectorRoot>();
for (let [element, boundary] of currentStep) {
let next: (Element | SelectorRoot)[] = [];
if (token.combinator === '^') {
next = element === boundary ? [] : (parentOrRoot(element) ? [parentOrRoot(element)] : []);
} else if (token.combinator === '>') {
boundary = element;
next = this._matchChildren(element, token, all);
} else if (token.combinator === '') {
boundary = element;
next = this._matchSubtree(element, token, all);
} else if (token.combinator === '~') {
while (true) {
next = this._matchSubtree(element, token, all);
if (next.length) {
// Further '~' / '^' will not go outside of this boundary, which is
// a container with both the cue and the target elements inside.
boundary = element;
break;
}
if (element === boundary)
break;
element = parentOrRoot(element);
}
}
for (const nextElement of next) {
if (!nextStep.has(nextElement))
nextStep.set(nextElement, boundary);
}
}
currentStep = nextStep;
}
return Array.from(currentStep.keys()).filter(e => e.nodeType === 1 /* Node.ELEMENT_NODE */) as Element[];
}
create(root: SelectorRoot, target: Element, type: SelectorType): string {
const path = pathFromRoot(root, target);
const maxCueCount = type === 'notext' ? 50 : 10;
const { pathCues, lcaMap } = this._preprocess(root, path, maxCueCount);
const lists: ListIndex | undefined = this.options.detectLists ?
this._buildLists(root, path) : undefined;
const queue: Map<Element | SelectorRoot | undefined, Step>[] = path.map(_ => new Map());
const startStep: Step = {
token: { combinator: '' },
element: root,
depth: 0,
score: 0,
totalScore: 0
};
for (let stepDepth = -1; stepDepth < path.length; stepDepth++) {
const stepsMap = stepDepth === -1 ? new Map([[undefined, startStep]]) : queue[stepDepth];
const ancestorDepth = stepDepth === -1 ? 0 : stepDepth;
for (const [text, cue] of pathCues) {
const elements = cue.elements[ancestorDepth];
for (let index = 0; index < elements.length; index++) {
const element = elements[index];
const lca = lcaMap.get(element)!;
const lcaDepth = lca.lcaDepth;
// Always go deeper in the tree.
if (lcaDepth <= stepDepth)
continue;
// 'notext' - do not use elements from the target's subtree.
if (type === 'notext' && lcaDepth === path.length - 1 && lca.depth > 0)
continue;
// 'notext' - do not use target's own text.
if (type === 'notext' && lcaDepth === path.length - 1 && !lca.depth && cue.type !== 'tag')
continue;
const targetAnchor = path[lcaDepth + 1];
if (lists && lca.anchor && targetAnchor && lca.anchor !== targetAnchor) {
const oldList = lists.get(lca.anchor);
// Do not use cues from sibling list items (lca.anchor and targetAnchor).
if (oldList && oldList === lists.get(targetAnchor))
continue;
}
if (cue.type !== 'tag' && !this._isVisible(element))
continue;
const distanceToTarget = path.length - stepDepth;
// Short text can be used more effectively in a smaller scope.
let shortTextScore = 0;
if (this.options.avoidShortText && cue.type === 'text')
shortTextScore = Math.max(0, distanceToTarget - 2 * (text.length - 2));
const score = (cue.score + shortTextScore) * (
// Unique cues are heavily favored.
1 * (index + elements.length * 1000) +
// Larger text is preferred.
5 * (cue.type === 'text' ? this._elementMetrics(element).fontMetric : 1) +
// The closer to the target, the better.
1 * lca.depth
);
for (const [anchor, step] of stepsMap) {
// This ensures uniqueness when resolving the selector.
if (anchor && (cue.anchorCount.get(anchor) || 0) > index)
continue;
let newStep: Step = {
token: {
combinator: stepDepth === -1 ? '' : '~',
text: cue.type === 'text' ? text : undefined,
css: cue.type === 'text' ? undefined : text,
index: index || undefined,
},
previous: step,
depth: lca.depth,
element,
score,
totalScore: step.totalScore + score
};
let nextStep = queue[lcaDepth].get(lca.anchor);
if (!nextStep || nextStep.totalScore > newStep.totalScore)
queue[lcaDepth].set(lca.anchor, newStep);
// Try going to the ancestor.
if (newStep.depth) {
newStep = {
token: { combinator: '^' },
previous: newStep,
depth: 0,
element: lca.lca,
score: 2000 * newStep.depth,
totalScore: newStep.totalScore + 2000 * newStep.depth,
repeat: newStep.depth
};
nextStep = queue[lcaDepth].get(undefined);
if (!nextStep || nextStep.totalScore > newStep.totalScore)
queue[lcaDepth].set(undefined, newStep);
}
}
}
}
}
let best: Step | undefined;
for (const [, step] of queue[path.length - 1]) {
if (!best || step.totalScore < best.totalScore)
best = step;
}
if (!best)
return '';
const tokens: Token[] = new Array(best.depth).fill({ combinator: '^' });
while (best && best !== startStep) {
for (let repeat = best.repeat || 1; repeat; repeat--)
tokens.push(best.token);
best = best.previous;
}
tokens.reverse();
return this._serialize(tokens);
}
private _textMetric(text: string): number {
// Text which looks like a float number or counter is most likely volatile.
if (/^\$?[\d,]+(\.\d+|(\.\d+)?[kKmMbBgG])?$/.test(text))
return 12;
const num = Number(text);
// Large numbers are likely volatile.
if (!isNaN(num) && (num >= 32 || num < 0))
return 6;
return 1;
}
private _elementMetrics(element: Element): ElementMetrics {
let metrics = this._metrics.get(element);
if (!metrics) {
const style = element.ownerDocument ?
element.ownerDocument.defaultView!.getComputedStyle(element) :
({} as CSSStyleDeclaration);
const box = element.getBoundingClientRect();
const fontSize = (parseInt(style.fontSize || '', 10) || 12) / 12; // default 12 px
const fontWeight = (parseInt(style.fontWeight || '', 10) || 400) / 400; // default normal weight
let fontMetric = fontSize * (1 + (fontWeight - 1) / 5);
fontMetric = 1 / Math.exp(fontMetric - 1);
metrics = { box, style, fontMetric };
this._metrics.set(element, metrics);
}
return metrics;
}
private _isVisible(element: Element): boolean {
const metrics = this._elementMetrics(element);
return metrics.box.width > 1 && metrics.box.height > 1;
}
private _preprocess(root: SelectorRoot, path: (Element | SelectorRoot)[], maxCueCount: number): PreprocessResult {
const pathCues = new Map<string, PathCue>();
const lcaMap = new Map<Element | SelectorRoot, Lca>();
const textScore = this.options.textScore || 1;
const appendCue = (text: string, type: CueType, score: number, element: Element, lca: Lca, textValue: string) => {
let pathCue = pathCues.get(text);
if (!pathCue) {
pathCue = { type, score: (textValue ? this._textMetric(textValue) : 1) * score, elements: [], anchorCount: new Map() };
for (let i = 0; i < path.length; i++)
pathCue.elements.push([]);
pathCues.set(text, pathCue);
}
for (let index = lca.lcaDepth; index >= 0; index--) {
const elements = pathCue.elements[index];
if (elements.length < maxCueCount)
elements.push(element);
}
if (lca.anchor)
pathCue.anchorCount.set(lca.anchor, 1 + (pathCue.anchorCount.get(lca.anchor) || 0));
};
const appendElementCues = (element: Element, lca: Lca, detached: boolean) => {
const nodeName = element.nodeName;
if (!detached && this.options.usePlaceholders && nodeName === 'INPUT') {
const placeholder = element.getAttribute('placeholder');
if (placeholder)
appendCue(JSON.stringify(placeholder), 'text', textScore, element, lca, placeholder);
}
if (!detached && nodeName === 'INPUT' && element.getAttribute('type') === 'button') {
const value = element.getAttribute('value');
if (value)
appendCue(JSON.stringify(value), 'text', textScore, element, lca, value);
}
appendCue(nodeName, 'tag', this.options.genericTagScore, element, lca, '');
if (this.options.imgAltScore && nodeName === 'IMG') {
const alt = element.getAttribute('alt');
if (alt)
appendCue(`img[alt=${JSON.stringify(alt)}]`, 'imgAlt', this.options.imgAltScore, element, lca, alt);
}
if (this.options.ariaLabelScore) {
const ariaLabel = element.getAttribute('aira-label');
if (ariaLabel)
appendCue(JSON.stringify(`[aria-label=${JSON.stringify(ariaLabel)}]`), 'ariaLabel', this.options.ariaLabelScore, element, lca, ariaLabel);
}
};
const visit = (element: Element | SelectorRoot, lca: Lca, depth: number) => {
// Check for elements STYLE, NOSCRIPT, SCRIPT, OPTION and other elements
// that have |display:none| behavior.
const detached = !(element as HTMLElement).offsetParent;
if (element.nodeType === 1 /* Node.ELEMENT_NODE */)
appendElementCues(element as Element, lca, detached);
lcaMap.set(element, lca);
for (let childNode = element.firstChild; childNode; childNode = childNode.nextSibling) {
if (element.nodeType === 1 /* Node.ELEMENT_NODE */ && !detached && childNode.nodeType === 3 /* Node.TEXT_NODE */ && childNode.nodeValue) {
const textValue = childNode.nodeValue.trim();
if (textValue)
appendCue(JSON.stringify(textValue), 'text', textScore, element as Element, lca, textValue);
}
if (childNode.nodeType !== 1 /* Node.ELEMENT_NODE */)
continue;
const childElement = childNode as Element;
if (childElement.nodeName.startsWith('<pseudo:'))
continue;
if (path[depth + 1] === childElement) {
const childLca = { depth: 0, lca: childElement, lcaDepth: depth + 1, anchor: (undefined as Element | SelectorRoot | undefined) };
visit(childElement, childLca, depth + 1);
} else {
const childLca = { depth: lca.depth + 1, lca: lca.lca, lcaDepth: lca.lcaDepth, anchor: lca.anchor || element };
visit(childElement, childLca, depth + 1);
}
}
};
visit(root, { depth: 0, lca: root, lcaDepth: 0, anchor: undefined }, 0);
return { pathCues: pathCues, lcaMap };
}
private _filterCues(cues: CueMap, root: Element | SelectorRoot): CueMap {
const result = new Map();
for (const [text, cue] of cues) {
const filtered = cue.elements.filter(element => root.contains(element));
if (!filtered.length)
continue;
const newCue: Cue = { type: cue.type, score: cue.score, elements: filtered };
result.set(text, newCue);
}
return result;
}
private _buildLists(root: Element | SelectorRoot, path: (Element | SelectorRoot)[]): ListIndex {
const pathSet = new Set(path);
const map = detectLists(root, e => pathSet.has(e), e => this._elementMetrics(e).box);
const result: ListIndex = new Map();
let listNumber = 1;
for (const collection of map.values()) {
for (const list of collection) {
for (const child of list)
result.set(child, listNumber);
++listNumber;
}
}
return result;
}
private _matchChildren(parent: Element | SelectorRoot, token: Token, all: boolean): Element[] {
const result: Element[] = [];
if (token.index !== undefined)
all = false;
let index = token.index || 0;
if (token.css !== undefined) {
for (let child = parent.firstElementChild; child; child = child.nextElementSibling) {
if (child.matches(token.css) && (all || !index--)) {
result.push(child);
if (!all)
return result;
}
}
return result;
}
if (token.text !== undefined) {
const cue = this._getCues(parent).get(token.text);
if (!cue || cue.type !== 'text')
return [];
for (const element of cue.elements) {
if (parentOrRoot(element) === parent && (all || !index--)) {
result.push(element);
if (!all)
return result;
}
}
return result;
}
throw new Error('Unsupported token');
}
private _matchSubtree(root: Element | SelectorRoot, token: Token, all: boolean): Element[] {
const result: Element[] = [];
if (token.index !== undefined)
all = false;
let index = token.index || 0;
if (token.css !== undefined) {
if (root.nodeType === 1 /* Node.ELEMENT_NODE */) {
const rootElement = root as Element;
if (rootElement.matches(token.css) && (all || !index--)) {
result.push(rootElement);
if (!all)
return result;
}
}
const queried = root.querySelectorAll(token.css);
if (all)
result.push(...Array.from(queried));
else if (queried.length > index)
result.push(queried.item(index));
return result;
}
if (token.text !== undefined) {
const texts = this._getCues(root);
const cue = texts.get(token.text);
if (!cue || cue.type !== 'text')
return result;
if (all)
return cue.elements;
if (index < cue.elements.length)
result.push(cue.elements[index]);
return result;
}
throw new Error('Unsupported token');
}
private _getCues(element: Element | SelectorRoot): CueMap {
if (!this._cues.has(element)) {
let parent = element;
while (!this._cues.has(parent))
parent = parentOrRoot(parent)!;
this._cues.set(element, this._filterCues(this._cues.get(parent)!, element));
}
return this._cues.get(element)!;
}
private _serialize(tokens: Token[]): string {
const result = tokens.map(token => (token.combinator === '' ? ' ' : token.combinator) +
(token.text !== undefined ? token.text : '') +
(token.css !== undefined ? token.css : '') +
(token.index !== undefined ? '#' + token.index : '')).join('');
if (result[0] !== ' ')
throw new Error('First token is wrong');
return result.substring(1);
}
}
const ZSSelectorEngine: SelectorEngine = {
name: 'zs',
create(root: SelectorRoot, element: Element, type?: SelectorType): string {
return new Engine().create(root, element, type || 'default');
},
query(root: SelectorRoot, selector: string): Element | undefined {
return new Engine().query(root, selector, false /* all */)[0];
},
queryAll(root: SelectorRoot, selector: string): Element[] {
return new Engine().query(root, selector, true /* all */);
}
};
(ZSSelectorEngine as any).test = () => {
const elements = Array.from(document.querySelectorAll('*')).slice(1500, 2000);
console.time('test'); // eslint-disable-line no-console
const failures = elements.filter((e, index) => {
const name = e.tagName.toUpperCase();
if (name === 'SCRIPT' || name === 'STYLE' || name === 'NOSCRIPT' || name === 'META' || name === 'LINK' || name === 'OPTION')
return false;
if (index % 100 === 0)
console.log(`${index} / ${elements.length}`); // eslint-disable-line no-console
if (e.nodeName.toLowerCase().startsWith('<pseudo:'))
e = e.parentElement!;
while (e && e.namespaceURI && e.namespaceURI.endsWith('svg') && e.nodeName.toLowerCase() !== 'svg')
e = e.parentElement!;
try {
document.documentElement!.style!.outline = '1px solid red';
const selector = new Engine().create(document.documentElement, e, 'default');
document.documentElement!.style!.outline = '1px solid green';
const e2 = new Engine().query(document.documentElement, selector, false)[0];
return e !== e2;
} catch (e) {
return false;
}
});
console.timeEnd('test'); // eslint-disable-line no-console
console.log(failures); // eslint-disable-line no-console
};
export default ZSSelectorEngine;

View file

@ -0,0 +1,32 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
const path = require('path');
const InlineSource = require('./webpack-inline-source-plugin.js');
module.exports = {
entry: path.join(__dirname, 'zsSelectorEngine.ts'),
devtool: 'source-map',
module: {
rules: [
{
test: /\.tsx?$/,
loader: 'ts-loader',
options: {
transpileOnly: true
},
exclude: /node_modules/
}
]
},
resolve: {
extensions: [ '.tsx', '.ts', '.js' ]
},
output: {
filename: 'zsSelectorEngineSource.js',
path: path.resolve(__dirname, '../../lib/injected/generated')
},
plugins: [
new InlineSource(path.join(__dirname, '..', 'generated', 'zsSelectorEngineSource.ts')),
]
};

View file

@ -28,6 +28,7 @@ import * as types from './types';
import { Events } from './events'; import { Events } from './events';
import { BrowserContext, BrowserInterface } from './browserContext'; import { BrowserContext, BrowserInterface } from './browserContext';
import { ConsoleMessage, ConsoleMessageLocation } from './console'; import { ConsoleMessage, ConsoleMessageLocation } from './console';
import Injected from './injected/injected';
export interface PageDelegate { export interface PageDelegate {
readonly rawMouse: input.RawMouse; readonly rawMouse: input.RawMouse;
@ -188,6 +189,13 @@ export class Page extends EventEmitter {
return this.mainFrame().$(selector); return this.mainFrame().$(selector);
} }
async _createSelector(name: string, handle: dom.ElementHandle<Element>): Promise<string> {
const mainWorld = await this.mainFrame()._mainDOMWorld();
return mainWorld.context.evaluate((injected: Injected, target: Element, name: string) => {
return injected.engines.get(name).create(document.documentElement, target);
}, await mainWorld.injected(), handle, name);
}
evaluateHandle: types.EvaluateHandle = async (pageFunction, ...args) => { evaluateHandle: types.EvaluateHandle = async (pageFunction, ...args) => {
const context = await this.mainFrame().executionContext(); const context = await this.mainFrame().executionContext();
return context.evaluateHandle(pageFunction, ...args as any); return context.evaluateHandle(pageFunction, ...args as any);

View file

@ -26,6 +26,11 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
const idAttribute = await page.$eval('css=section', e => e.id); const idAttribute = await page.$eval('css=section', e => e.id);
expect(idAttribute).toBe('testAttribute'); expect(idAttribute).toBe('testAttribute');
}); });
it('should work with zs selector', async({page, server}) => {
await page.setContent('<section id="testAttribute">43543</section>');
const idAttribute = await page.$eval('zs="43543"', e => e.id);
expect(idAttribute).toBe('testAttribute');
});
it('should work with xpath selector', async({page, server}) => { it('should work with xpath selector', async({page, server}) => {
await page.setContent('<section id="testAttribute">43543</section>'); await page.setContent('<section id="testAttribute">43543</section>');
const idAttribute = await page.$eval('xpath=/html/body/section', e => e.id); const idAttribute = await page.$eval('xpath=/html/body/section', e => e.id);
@ -71,8 +76,8 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
expect(text).toBe('hello world!'); expect(text).toBe('hello world!');
}); });
it('should support >> syntax with different engines', async({page, server}) => { it('should support >> syntax with different engines', async({page, server}) => {
await page.setContent('<section><div>hello</div></section>'); await page.setContent('<section><div><span>hello</span></div></section>');
const text = await page.$eval('xpath=/html/body/section >> css=div', (e, suffix) => e.textContent + suffix, ' world!'); const text = await page.$eval('xpath=/html/body/section >> css=div >> zs="hello"', (e, suffix) => e.textContent + suffix, ' world!');
expect(text).toBe('hello world!'); expect(text).toBe('hello world!');
}); });
it('should support spaces with >> syntax', async({page, server}) => { it('should support spaces with >> syntax', async({page, server}) => {
@ -92,6 +97,8 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
expect(text3).toBe('Hello from root1'); expect(text3).toBe('Hello from root1');
const text4 = await page.$eval('xpath=/html/body/section/div >> css=div >> css=span', e => e.textContent); const text4 = await page.$eval('xpath=/html/body/section/div >> css=div >> css=span', e => e.textContent);
expect(text4).toBe('Hello from root2'); expect(text4).toBe('Hello from root2');
const text5 = await page.$eval('zs=section div >> css=div >> css=span', e => e.textContent);
expect(text5).toBe('Hello from root2');
}); });
}); });
@ -101,6 +108,11 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
const divsCount = await page.$$eval('css=div', divs => divs.length); const divsCount = await page.$$eval('css=div', divs => divs.length);
expect(divsCount).toBe(3); expect(divsCount).toBe(3);
}); });
it('should work with zs selector', async({page, server}) => {
await page.setContent('<div>hello</div><div>beautiful</div><div>world!</div>');
const divsCount = await page.$$eval('zs=div', divs => divs.length);
expect(divsCount).toBe(3);
});
it('should work with xpath selector', async({page, server}) => { it('should work with xpath selector', async({page, server}) => {
await page.setContent('<div>hello</div><div>beautiful</div><div>world!</div>'); await page.setContent('<div>hello</div><div>beautiful</div><div>world!</div>');
const divsCount = await page.$$eval('xpath=/html/body/div', divs => divs.length); const divsCount = await page.$$eval('xpath=/html/body/div', divs => divs.length);
@ -130,12 +142,17 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
}); });
describe('Page.$', function() { describe('Page.$', function() {
it('should query existing element', async({page, server}) => { it('should query existing element with css selector', async({page, server}) => {
await page.setContent('<section>test</section>'); await page.setContent('<section>test</section>');
const element = await page.$('css=section'); const element = await page.$('css=section');
expect(element).toBeTruthy(); expect(element).toBeTruthy();
}); });
it('should query existing element with xpath', async({page, server}) => { it('should query existing element with zs selector', async({page, server}) => {
await page.setContent('<section>test</section>');
const element = await page.$('zs="test"');
expect(element).toBeTruthy();
});
it('should query existing element with xpath selector', async({page, server}) => {
await page.setContent('<section>test</section>'); await page.setContent('<section>test</section>');
const element = await page.$('xpath=/html/body/section'); const element = await page.$('xpath=/html/body/section');
expect(element).toBeTruthy(); expect(element).toBeTruthy();
@ -149,6 +166,11 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
const element = await page.$('//html/body/section'); const element = await page.$('//html/body/section');
expect(element).toBeTruthy(); expect(element).toBeTruthy();
}); });
it('should auto-detect zs selector', async({page, server}) => {
await page.setContent('<section>test</section>');
const element = await page.$('"test"');
expect(element).toBeTruthy();
});
it('should auto-detect css selector', async({page, server}) => { it('should auto-detect css selector', async({page, server}) => {
await page.setContent('<section>test</section>'); await page.setContent('<section>test</section>');
const element = await page.$('section'); const element = await page.$('section');
@ -223,6 +245,16 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
expect(content).toBe('A'); expect(content).toBe('A');
}); });
it('should query existing element with zs selector', async({page, server}) => {
await page.goto(server.PREFIX + '/playground.html');
await page.setContent('<html><body><div class="second"><div class="inner">A</div></div></body></html>');
const html = await page.$('zs=html');
const second = await html.$('zs=.second');
const inner = await second.$('zs=.inner');
const content = await page.evaluate(e => e.textContent, inner);
expect(content).toBe('A');
});
it('should return null for non-existing element', async({page, server}) => { it('should return null for non-existing element', async({page, server}) => {
await page.setContent('<html><body><div class="second"><div class="inner">B</div></div></body></html>'); await page.setContent('<html><body><div class="second"><div class="inner">B</div></div></body></html>');
const html = await page.$('html'); const html = await page.$('html');
@ -333,4 +365,122 @@ module.exports.addTests = function({testRunner, expect, product, FFOX, CHROME, W
expect(second).toEqual([]); expect(second).toEqual([]);
}); });
}); });
describe('zselector', () => {
it('query', async ({page}) => {
await page.setContent(`<div>yo</div><div>ya</div><div>ye</div>`);
expect(await page.$eval(`zs="ya"`, e => e.outerHTML)).toBe('<div>ya</div>');
await page.setContent(`<div foo="baz"></div><div foo="bar space"></div>`);
expect(await page.$eval(`zs=[foo="bar space"]`, e => e.outerHTML)).toBe('<div foo="bar space"></div>');
await page.setContent(`<div>yo<span></span></div>`);
expect(await page.$eval(`zs=span`, e => e.outerHTML)).toBe('<span></span>');
expect(await page.$eval(`zs=div > span`, e => e.outerHTML)).toBe('<span></span>');
expect(await page.$eval(`zs=div span`, e => e.outerHTML)).toBe('<span></span>');
expect(await page.$eval(`zs="yo" > span`, e => e.outerHTML)).toBe('<span></span>');
expect(await page.$eval(`zs="yo" span`, e => e.outerHTML)).toBe('<span></span>');
expect(await page.$eval(`zs=span ^`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs=span ~ div`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs=span ~ "yo"`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
await page.setContent(`<div>yo</div><div>yo<span></span></div>`);
expect(await page.$eval(`zs="yo"#0`, e => e.outerHTML)).toBe('<div>yo</div>');
expect(await page.$eval(`zs="yo"#1`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs="yo" ~ DIV#1`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs=span ~ div#1`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs=span ~ div#0`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
expect(await page.$eval(`zs=span ~ "yo"#1 ^ > div`, e => e.outerHTML)).toBe('<div>yo</div>');
expect(await page.$eval(`zs=span ~ "yo"#1 ^ > div#1`, e => e.outerHTML)).toBe('<div>yo<span></span></div>');
await page.setContent(`<div>yo<span id="s1"></span></div><div>yo<span id="s2"></span><span id="s3"></span></div>`);
expect(await page.$eval(`zs="yo"`, e => e.outerHTML)).toBe('<div>yo<span id="s1"></span></div>');
expect(await page.$$eval(`zs="yo"`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<div>yo<span id="s1"></span></div>\n<div>yo<span id="s2"></span><span id="s3"></span></div>');
expect(await page.$$eval(`zs="yo"#1`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<div>yo<span id="s2"></span><span id="s3"></span></div>');
expect(await page.$$eval(`zs="yo" ~ span`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<span id="s1"></span>\n<span id="s2"></span>\n<span id="s3"></span>');
expect(await page.$$eval(`zs="yo"#1 ~ span`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<span id="s2"></span>\n<span id="s3"></span>');
expect(await page.$$eval(`zs="yo" ~ span#0`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<span id="s1"></span>\n<span id="s2"></span>');
expect(await page.$$eval(`zs="yo" ~ span#1`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<span id="s2"></span>\n<span id="s3"></span>');
});
it('create', async ({page}) => {
await page.setContent(`<div>yo</div><div>ya</div><div>ya</div>`);
expect(await page._createSelector('zs', await page.$('div'))).toBe('"yo"');
expect(await page._createSelector('zs', await page.$('div:nth-child(2)'))).toBe('"ya"');
expect(await page._createSelector('zs', await page.$('div:nth-child(3)'))).toBe('"ya"#1');
await page.setContent(`<img alt="foo bar">`);
expect(await page._createSelector('zs', await page.$('img'))).toBe('img[alt="foo bar"]');
await page.setContent(`<div>yo<span></span></div><span></span>`);
expect(await page._createSelector('zs', await page.$('span'))).toBe('"yo"~SPAN');
expect(await page._createSelector('zs', await page.$('span:nth-child(2)'))).toBe('SPAN#1');
});
it('children of various display parents', async ({page}) => {
await page.setContent(`<body><div style='position: fixed;'><span>yo</span></div></body>`);
expect(await page._createSelector('zs', await page.$('span'))).toBe('"yo"');
await page.setContent(`<div style='position: relative;'><span>yo</span></div>`);
expect(await page._createSelector('zs', await page.$('span'))).toBe('"yo"');
// "display: none" makes all children text invisible - fallback to tag name.
await page.setContent(`<div style='display: none;'><span>yo</span></div>`);
expect(await page._createSelector('zs', await page.$('span'))).toBe('SPAN');
});
it('boundary', async ({page}) => {
await page.setContent(`
<div>hey</div>
<div>hey</div>
<div>hey</div>
<div>
<div>yo</div>
<div>hello</div>
<div>hello</div>
<div>hello</div>
<div>unique</div>
<div>
<div>hey2<span></span><span></span><span></span></div>
<div>hello</div>
</div>
<div>
<div>hey<span></span><span></span><span></span></div>
<div>hello</div>
</div>
</div>
<div>
<div>ya<div>
<div id=first>hello</div>
<div>hello</div>
<div>hello</div>
<div>
<div>hey2<span></span><span></span><span></span></div>
<div>hello</div>
</div>
<div>
<div>hey<span></span><span></span><span></span></div>
<div id=target>hello</div>
</div>
</div>
<div>
<div>ya<div>
<div id=first2>hello</div>
<div>hello</div>
<div>hello</div>
<div>
<div>hey2<span></span><span></span><span></span></div>
<div>hello</div>
</div>
<div>
<div>hey<span></span><span></span><span></span></div>
<div id=target2>hello</div>
</div>
</div>`);
expect(await page._createSelector('zs', await page.$('#target'))).toBe('"ya"~"hey"~"hello"');
expect(await page.$eval(`zs="ya"~"hey"~"hello"`, e => e.outerHTML)).toBe('<div id="target">hello</div>');
expect(await page.$eval(`zs="ya"~"hey"~"unique"`, e => e.outerHTML).catch(e => e.message)).toBe('Error: failed to find element matching selector "zs="ya"~"hey"~"unique""');
expect(await page.$$eval(`zs="ya" ~ "hey" ~ "hello"`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<div id="target">hello</div>\n<div id="target2">hello</div>');
});
});
}; };

View file

@ -7,6 +7,7 @@ const path = require('path');
const files = [ const files = [
path.join('src', 'injected', 'cssSelectorEngine.webpack.config.js'), path.join('src', 'injected', 'cssSelectorEngine.webpack.config.js'),
path.join('src', 'injected', 'xpathSelectorEngine.webpack.config.js'), path.join('src', 'injected', 'xpathSelectorEngine.webpack.config.js'),
path.join('src', 'injected', 'zsSelectorEngine.webpack.config.js'),
path.join('src', 'injected', 'injected.webpack.config.js'), path.join('src', 'injected', 'injected.webpack.config.js'),
]; ];