User:YFdyh000/DuplicateReferences.js

注意:保存之后,你必须清除浏览器缓存才能看到做出的更改。Google ChromeFirefoxMicrosoft EdgeSafari:按住⇧ Shift键并单击工具栏的“刷新”按钮。参阅Help:绕过浏览器缓存以获取更多帮助。

// a fork of https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferences.js
// Testpage: https://zh.wikipedia.org/wiki/User:YFdyh000/DuplicateReferencesTest

// <nowiki>
mw.loader.using( ['mediawiki.util', 'ext.gadget.HanAssist'] ).then( ( require ) => {
    const { batchConv } = require( 'ext.gadget.HanAssist' );

    $(document).ready(function () {
        const checkAlways = true; // 设为 false 则仅面板展开才会计算,降低CPU占用。面板自动记忆状态。

        const DEBUG = false;
        const WARN = true;

        function debug(...args) {
            if (DEBUG) {
                console.log('[DuplicateReferences]', ...args);
            }
        }
        function warn(...args) {
            if (WARN) {
                console.log('[DuplicateReferences]', ...args);
            }
        }

		if (
		    mw.config.get('wgAction') !== 'view' ||
		    mw.config.get('wgDiffNewId') ||
		    mw.config.get('wgDiffOldId') ||
		    (mw.config.get('wgNamespaceNumber') !== 0 && (mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest') && mw.config.get('wgPageName') !== 'User:YFdyh000/DuplicateReferencesTest')
		) {
			debug("Not the correct page or action, script terminated");
		    return;
		}

        mw.messages.set( batchConv( {
            'dupeRef-toolName': { hans: '文献去重', hant: '文獻去重' },
            'dupeRef-addFail': { hans: '添加模板失败,详见网页控制台(F12)中的信息。', hant: '添加模板失敗,詳見網頁控制台(F12)中的信息。' },
            'dupeRef-working': { hans: '处理中...', hant: '處理中...' },
            'dupeRef-done': { hans: '完成', hant: '完成' },
            'dupeRef-error': { hans: '出错', hant: '出錯' },
            'dupeRef-toolTitle': { hans: '重复的参考文献 ', hant: '重複的參考文獻 ' },
            'dupeRef-toolTitleWithNumber': { hans: '重复的参考文献 ($1/$2) ', hant: '重複的參考文獻 ($1/$2) ' },
            'dupeRef-addAction': { hans: '添加', hant: '添加' },
            'dupeRef-feedbackFalse': { hans: '反馈误报', hant: '反饋誤報' },
            'dupeRef-atRef': { hans: ' 于脚注: ', hant: ' 於腳註:' },
            'dupeRef-show': { hans: '显示', hant: '顯示' },
            'dupeRef-hide': { hans: '隐藏', hant: '隱藏' },
        } ) );

        debug("Page title:", document.title);
        debug("URL:", window.location.href);

        function countUniqueReferencesParents(selector) {
            // 获取所有符合选择器的元素
            const elements = document.querySelectorAll(selector);
            const parentCount = {};
        
            // 遍历元素,统计匹配的父元素
            elements.forEach(element => {
                let parent = element.closest('ol.references');
        
                if (parent) {
                    if(parent.parentElement.classList.contains('reflist')||parent.parentElement.classList.contains('notelist')) { // 去掉一层干扰
                        parent = parent.parentElement;
                    }
                    // 获取父元素的上一个兄弟节点
                    const previousSibling = parent.previousElementSibling;
                    let uniqueIdentifier = '';
                    if (previousSibling && previousSibling.classList.contains('mw-heading')) {
                        const hElement = previousSibling.firstElementChild;
                        if (hElement && hElement.id) {
                            uniqueIdentifier = `#${hElement.id}`;
                        }
                    }
                    if (parent.classList.contains('mw-parser-output')) {
                        uniqueIdentifier = `.mw-parser-output>ol.references`; // `.mw-parser-output>ol.references .reference-text a`
                    }
                    parentCount[uniqueIdentifier] = (parentCount[uniqueIdentifier] || 0) + 1;
                }
            });
        
            // 将结果转换为数组并按数量排序
            const sortedParents = Object.entries(parentCount)
                .map(([parent, count]) => ({ parent, count }))
                .sort((a, b) => b.count - a.count);
        
            return sortedParents;
        }
        const result = countUniqueReferencesParents(('ol.references .reference-text a'));
        if (result.length === 0) {
            warn("没有找到参考文献章节");
            return;
        }
        hEl = document.querySelector(result[0].parent);
        const containerDiv = hEl.parentElement;
        const referencesList = containerDiv.nextElementSibling.querySelector('ol.references');
        if (!referencesList) {
            warn("references element not found");
            return;
        }
        
        const style = document.createElement('style');
        style.textContent = `
            li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
            .duplicate-citation-highlight { background-color: #e1eeff; }
            .duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }
            .duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
            .mw-collapsible-toggle { font-weight: normal; float: right; }
            .duplicate-references-table { width: 100%; }
            @media only screen and (max-width: 768px) {
                .duplicate-references-table { display: none; }
            }
            .duplicate-references-table .row {overflow-wrap: anywhere;}
        `;
        document.head.appendChild(style);

        function addDuplicateCitationsTemplate(linkElement) {
            debug("Adding duplicate citations template");
            showLoading(linkElement);
            var api = new mw.Api();
            var pageTitle = mw.config.get('wgPageName');

            let duplicateInfo = getDuplicateInfo();

            // Get current date
            const currentDate = new Date();
            const isoDate = currentDate.toISOString().split('T')[0];
            const dateParam = `|date=${isoDate}`;

            api.get({
                action: 'query',
                prop: 'revisions',
                titles: pageTitle,
                rvprop: 'content',
                rvslots: 'main',
                formatversion: 2
            }).then(function (data) {
                var page = data.query.pages[0];
                var content = page.revisions[0].slots.main.content;

                // Define the templates to check for
                const templatesToCheck = [
                    '{{short description',
                    '{{DISPLAYTITLE',
                    '{{Lowercase title',
                    '{{Italic title',
                    '{{about',
                    '{{redirect',
                    '{{Distinguish',
                    '{{for'
                ];

                // Find the position to insert the new template
                let insertPosition = 0;
                let lines = content.split('\n');
                for (let i = 0; i < lines.length; i++) {
                    let line = lines[i].trim().toLowerCase();
                    if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {
                        insertPosition = i + 1;
                    } else if (line && !line.startsWith('{{') && !line.startsWith('__')) {
                        break;
                    }
                }

                // Create the reason string
                let reason = `[[User:YFdyh000/DuplicateReferences.js|${mw.msg('dupeRef-toolName')}]]检测到:<br>\n`;
                if (duplicateInfo.length > 0) {
                    duplicateInfo.forEach((info) => {
                        reason += `* ${info.url} (脚注: ${info.refs.map(r => r.number).join(', ')})<br>\n`;
                    });
                }

                // Insert the new template with the reason parameter
                lines.splice(insertPosition, 0, `{{Duplicated citations|reason=${reason}${dateParam}}}`);
                var newContent = lines.join('\n');

                let summary = `[[User:YFdyh000/DuplicateReferences.js|${mw.msg('dupeRef-toolName')}]] +{{Duplicated citations|reason=${reason}${dateParam}}}`;

                return api.postWithToken('csrf', {
                    action: 'edit',
                    title: pageTitle,
                    text: newContent,
                    summary: summary
                });
            }).then(function () {
                showSuccess(linkElement);
                setTimeout(function () {
                    location.reload();
                }, 100); // Reload after 0.1 second
            }).catch(function (error) {
                console.error('Error:', error);
                showError(linkElement);
                mw.notify(`${mw.msg('dupeRef-addFail')}`, {type: 'error'});
            });
        }

        function showLoading(element) {
            element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-working')} ]</small></sup>`;
        }

        function showSuccess(element) {
            element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-done')} ]</small></sup>`;
        }

        function showError(element) {
            element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-error')} ]</small></sup>`;
        }

        function getVisibleText(element) {
            // Recursively get the visible text content of an element
            let text = '';
            for (let node of element.childNodes) {
                if (node.nodeType === Node.TEXT_NODE) {
                    text += node.textContent.trim() + ' ';
                } else if (node.nodeType === Node.ELEMENT_NODE) {
                    // Skip hidden elements
                    const style = window.getComputedStyle(node);
                    if (style.display !== 'none' && style.visibility !== 'hidden') {
                        text += getVisibleText(node) + ' ';
                    }
                }
            }
            return text.trim();
        }

        function calculateLevenshteinDistance(a, b) {
            debug("Comparing:");
            debug("Text 1:", a);
            debug("Text 2:", b);

            if (a.length === 0) return b.length;
            if (b.length === 0) return a.length;

            const matrix = [];

            // Increment along the first column of each row
            for (let i = 0; i <= b.length; i++) {
                matrix[i] = [i];
            }

            // Increment each column in the first row
            for (let j = 0; j <= a.length; j++) {
                matrix[0][j] = j;
            }

            // Fill in the rest of the matrix
            for (let i = 1; i <= b.length; i++) {
                for (let j = 1; j <= a.length; j++) {
                    if (b.charAt(i - 1) === a.charAt(j - 1)) {
                        matrix[i][j] = matrix[i - 1][j - 1];
                    } else {
                        matrix[i][j] = Math.min(
                            matrix[i - 1][j - 1] + 1, // substitution
                            Math.min(
                                matrix[i][j - 1] + 1, // insertion
                                matrix[i - 1][j] + 1  // deletion
                            )
                        );
                    }
                }
            }

            debug("Levenshtein distance:", matrix[b.length][a.length]);
            return matrix[b.length][a.length];
        }

        function calculateSimilarityPercentage(distance, maxLength) {
            const similarity = ((maxLength - distance) / maxLength) * 100;
            debug("Similarity percentage:", similarity.toFixed(2) + "%");
            return Math.round(similarity) + '%';
        }

        function getDuplicateInfo() {
            debug("Getting duplicate info");

            const duplicates = [];
            const urlMap = new Map();
            const referenceItems = Array.from(referencesList.children);

            debug("Number of reference items:", referenceItems.length);

            referenceItems.forEach((item, index) => {
                if (item.tagName.toLowerCase() === 'li') {
                    const refId = item.id;
                    const refNumber = index + 1;
                    debug(`Processing reference item ${refNumber} (${refId})`);

                    // Get the visible text of the entire reference item
                    const refText = getVisibleText(item);
                    debug(`  Reference text: ${refText}`);

                    // Find the first valid link in the reference
                    const links = item.querySelectorAll('a');
                    let validLink = null;
                    for (let link of links) {
                        const url = link.href;

                        // Skip this reference if the URL doesn't contain 'http'
                        if (!url.includes('http')) {
                            debug(`  Skipping reference ${refNumber} - URL does not contain 'http'`);
                            return; // This 'return' is equivalent to 'continue' in a regular for loop
                        }
                        const linkText = link.textContent.trim();

                        if (
                            // (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
                            (linkText !== "Archived" || linkText !== "存档" || linkText !== "存檔") &&
                            !url.includes("wikipedia.org") &&
                            !url.includes("_(identifier)") &&                       // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
                            !url.startsWith("https://www.bbc.co.uk/news/live/") &&  // BBC live articles get frequent updates
                            !url.startsWith("https://books.google.com/") &&         //may be 2 different pages of the same book
                            !url.startsWith("https://archive.org/details/isbn_")
                        ) {
                            validLink = link;
                            debug(`  Valid link found: ${url}`);
                            break;
                        }
                    }

                    if (validLink) {
                        const url = validLink.href;
                        if (urlMap.has(url)) {
                            urlMap.get(url).push({id: refId, number: refNumber, text: refText});
                            debug(`  Duplicate found for URL: ${url}`);
                        } else {
                            urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
                            debug(`  New URL added to map: ${url}`);
                        }
                    } else {
                        debug(`  No valid link found in this item`);
                    }
                }
            });

            urlMap.forEach((refs, url) => {
                if (refs.length > 1) {
                    // Calculate Levenshtein distance for each pair of refs
                    for (let i = 0; i < refs.length - 1; i++) {
                        for (let j = i + 1; j < refs.length; j++) {
                            debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
                            const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
                            const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
                            const similarity = calculateSimilarityPercentage(distance, maxLength);
                            refs[i].similarity = refs[i].similarity || {};
                            refs[i].similarity[refs[j].id] = similarity;
                        }
                    }
                    duplicates.push({url, refs});
                }
            });

            debug("Number of duplicate sets found:", duplicates.length);
            debug("Duplicate sets:", duplicates);
            return duplicates;
        }

        function createCollapsibleTable(duplicateInfo) {
            const table = document.createElement('table');
            table.className = 'wikitable mw-collapsible duplicate-references-table';
            table.setAttribute('role', 'presentation');

            const tbody = document.createElement('tbody');
            table.appendChild(tbody);

            const headerRow = document.createElement('tr');
            headerRow.classList.add('tool-title');
            const headerCell = document.createElement('td');
            headerCell.innerHTML = `<strong>${mw.msg('dupeRef-toolTitle')}</strong>`;

            const toggleSpan = document.createElement('span');
            toggleSpan.className = 'mw-collapsible-toggle';
            toggleSpan.innerHTML = `[<a href="#" class="mw-collapsible-text">${mw.msg('dupeRef-hide')}</a>]`;
            headerCell.appendChild(toggleSpan);


            // Check if the {{Duplicated citations}} template is already present
            const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');

            // Only add the link if the template is not present
            if (!duplicatedCitationsTemplate) {

                // Add the "add {{duplicated citations}}" link to the header
                const addTemplateLink = document.createElement('a');
                addTemplateLink.textContent = `${mw.msg('dupeRef-addAction')}{{duplicated citations}}`;
                addTemplateLink.href = '#';
                addTemplateLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    addDuplicateCitationsTemplate(this);
                });
                //headerCell.appendChild(document.createTextNode(' ['));
                headerCell.appendChild(addTemplateLink);
                //headerCell.appendChild(document.createTextNode(']'));
            }
            headerRow.appendChild(headerCell);
            tbody.appendChild(headerRow);

            duplicateInfo.forEach(({url, refs}) => {
                const row = document.createElement('tr');
                row.classList.add('row');
                const cell = document.createElement('td');

                // Create report icon
                const reportIcon = document.createElement('a');
                reportIcon.href = `https://zh.wikipedia.org/wiki/User talk:YFdyh000/DuplicateReferences.js`;
                reportIcon.innerHTML = `<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="${mw.msg('dupeRef-feedbackFalse')}" title="${mw.msg('dupeRef-feedbackFalse')}" />`;
                reportIcon.style.marginRight = '5px';
                cell.appendChild(reportIcon);

                let urlLink = document.createElement('a');
                urlLink.href = url;
                urlLink.textContent = url;
                urlLink.target = "_blank";
                urlLink.rel = "noopener noreferrer";

                cell.appendChild(urlLink);
                cell.appendChild(document.createTextNode(mw.msg('dupeRef-atRef')));

                const originalRef = refs[0];
                refs.forEach((ref, index) => {
                    let link = document.createElement('a');
                    link.href = `#${ref.id}`;
                    link.textContent = ref.number;
                    cell.appendChild(link);

                    // Add similarity information
                    if (index > 0) {
                        const similarity = calculateSimilarityPercentage(
                            calculateLevenshteinDistance(originalRef.text, ref.text),
                            Math.max(originalRef.text.length, ref.text.length)
                        );
                        let similarityInfo = document.createElement('span');
                        similarityInfo.textContent = ` (${similarity})`;
                        cell.appendChild(similarityInfo);
                    }
                    link.addEventListener('mouseover', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                if (r.id === ref.id) {
                                    citationElement.classList.add('duplicate-citation-hover');
                                } else {
                                    citationElement.classList.add('duplicate-citation-highlight');
                                }
                            }
                        });
                    });
                    link.addEventListener('mouseout', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.remove('duplicate-citation-hover');
                                citationElement.classList.remove('duplicate-citation-highlight');
                            }
                        });
                    });

                    link.addEventListener('click', () => {
                        document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
                            el.classList.remove('duplicate-citation-clicked');
                        });
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.add('duplicate-citation-clicked');
                            }
                        });
                    });

                    if (index < refs.length - 1) {
                        cell.appendChild(document.createTextNode(', '));
                    }
                });

                row.appendChild(cell);
                tbody.appendChild(row);
            });

            return table;
        }

        function checkDuplicateReferenceLinks(noChecking = false) {
            if(document.querySelector('.duplicate-references-table .row')) {
                debug("Stop recreating element.");
                return;
            }

            var addingCollapsible = function(table) {
                // Set up collapsible functionality
                const toggleLink = table.querySelector('.mw-collapsible-toggle a');
                const tableBody = $(table).find('tr:not(:first-child)');
                const storageKey = 'duplicateReferencesTableState';

                function setTableState(isCollapsed) {
                    if (isCollapsed) {
                        tableBody.hide();
                        toggleLink.textContent = mw.msg('dupeRef-show');
                    } else {
                        tableBody.show();
                        toggleLink.textContent = mw.msg('dupeRef-hide');
                    }
                    localStorage.setItem(storageKey, isCollapsed);
                }

                toggleLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    const isCurrentlyCollapsed = tableBody.is(':hidden') || tableBody.length === 0;
                    setTableState(!isCurrentlyCollapsed);
                    checkDuplicateReferenceLinks(false);
                });

                // Initialize state from localStorage
                const initialState = localStorage.getItem(storageKey) === 'true';
                setTableState(initialState);
            };
            
            if (noChecking) {
                debug("Creating collapsible table only.");
                const table = createCollapsibleTable([]);
                containerDiv.after(table);

                addingCollapsible(table);
                return;
            }

            const els = document.querySelectorAll('.duplicate-references-table');
            if (els.length > 0) {
                els.forEach(element => element.remove());
            }
            debug("Checking for duplicate reference links");
            const duplicateInfo = getDuplicateInfo();

            if (duplicateInfo.length > 0) {
                debug("Duplicates found, creating collapsible table");

                const table = createCollapsibleTable(duplicateInfo);
                containerDiv.after(table);

                const referenceItems = Array.from(referencesList.children);
                table.querySelector('.tool-title>td>strong').innerText = mw.msg('dupeRef-toolTitleWithNumber', duplicateInfo.length, referenceItems.length);
                
                addingCollapsible(table);
            } else {
                warn("No duplicates found");
            }
        }

        const storageKey = 'duplicateReferencesTableState';
        const initialState = localStorage.getItem(storageKey) === 'true';
        checkDuplicateReferenceLinks(initialState && !checkAlways);
        debug("Script execution completed");
    });
});
// </nowiki>