/* SPDX-FileCopyrightText: 2014 Christoph Cullmann SPDX-FileCopyrightText: 2020 Jonathan Poelen SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAS_XERCESC #include #include #include #include #include #include #include #include #include #include using namespace xercesc; /* * Ideas taken from: * * author : Boris Kolpackov * copyright : not copyrighted - public domain * * This program uses Xerces-C++ SAX2 parser to load a set of schema files * and then to validate a set of XML documents against these schemas. To * build this program you will need Xerces-C++ 3.0.0 or later. For more * information, see: * * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/ */ /** * Error handler object used during xml schema validation. */ class CustomErrorHandler : public ErrorHandler { public: /** * Constructor * @param messages Pointer to the error message string to fill. */ CustomErrorHandler(QString *messages) : m_messages(messages) { } /** * Check global success/fail state. * @return True if there was a failure, false otherwise. */ bool failed() const { return m_failed; } private: /** * Severity classes for error messages. */ enum severity { s_warning, s_error, s_fatal }; /** * Wrapper for warning exceptions. * @param e Exception to handle. */ void warning(const SAXParseException &e) override { m_failed = true; // be strict, warnings are evil, too! handle(e, s_warning); } /** * Wrapper for error exceptions. * @param e Exception to handle. */ void error(const SAXParseException &e) override { m_failed = true; handle(e, s_error); } /** * Wrapper for fatal error exceptions. * @param e Exception to handle. */ void fatalError(const SAXParseException &e) override { m_failed = true; handle(e, s_fatal); } /** * Reset the error status to "no error". */ void resetErrors() override { m_failed = false; } /** * Generic handler for error/warning/fatal error message exceptions. * @param e Exception to handle. * @param s Enum value encoding the message severtity. */ void handle(const SAXParseException &e, severity s) { // get id to print const XMLCh *xid(e.getPublicId()); if (!xid) xid = e.getSystemId(); m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ") << QString::fromUtf16(e.getMessage()) << Qt::endl; } private: /** * Storage for created error messages in this handler. */ QTextStream m_messages; /** * Global error state. True if there was an error, false otherwise. */ bool m_failed = false; }; class CustomXMLValidator : public SAX2XMLReaderImpl { public: QString messages; CustomErrorHandler eh{&messages}; CustomXMLValidator(XMLGrammarPool *xsd) : SAX2XMLReaderImpl(XMLPlatformUtils::fgMemoryManager, xsd) { // Commonly useful configuration. // setFeature(XMLUni::fgSAX2CoreNameSpaces, true); setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true); setFeature(XMLUni::fgSAX2CoreValidation, true); // Enable validation. // setFeature(XMLUni::fgXercesSchema, true); setFeature(XMLUni::fgXercesSchemaFullChecking, true); setFeature(XMLUni::fgXercesValidationErrorAsFatal, true); // Use the loaded grammar during parsing. // setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true); // Don't load schemas from any other source (e.g., from XML document's // xsi:schemaLocation attributes). // setFeature(XMLUni::fgXercesLoadSchema, false); // Xerces-C++ 3.1.0 is the first version with working multi import // support. // setFeature(XMLUni::fgXercesHandleMultipleImports, true); setErrorHandler(&eh); } }; #endif #include "../lib/worddelimiters_p.h" #include "../lib/xml_p.h" #include using KSyntaxHighlighting::WordDelimiters; using KSyntaxHighlighting::Xml::attrToBool; using namespace Qt::Literals::StringLiterals; static constexpr QStringView operator""_sv(const char16_t *s, std::size_t n) { return QStringView(s, s + n); } namespace { struct KateVersion { int majorRevision; int minorRevision; KateVersion(int majorRevision = 0, int minorRevision = 0) : majorRevision(majorRevision) , minorRevision(minorRevision) { } bool operator<(const KateVersion &version) const { return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision); } }; class HlFilesChecker { public: void setDefinition(QStringView verStr, const QString &filename, const QString &name, const QStringList &alternativeNames) { m_currentDefinition = &*m_definitions.insert(name, Definition{}); m_currentDefinition->languageName = name; m_currentDefinition->filename = filename; m_currentDefinition->kateVersionStr = verStr.toString(); m_currentKeywords = nullptr; m_currentContext = nullptr; const auto idx = verStr.indexOf(u'.'); if (idx <= 0) { qWarning() << filename << "invalid kateversion" << verStr; m_success = false; } else { m_currentDefinition->kateVersion = {verStr.sliced(0, idx).toInt(), verStr.sliced(idx + 1).toInt()}; } auto checkName = [this, &filename](char const *nameType, const QString &name) { auto it = m_names.find(name); if (it != m_names.end()) { qWarning() << filename << "duplicate" << nameType << "with" << it.value(); m_success = false; } else { m_names.insert(name, filename); } }; checkName("name", name); for (const auto &alternativeName : alternativeNames) { checkName("alternative name", alternativeName); } } KateVersion currentVersion() const { return m_currentDefinition->kateVersion; } void processElement(const QXmlStreamReader &xml) { switch (xml.tokenType()) { case QXmlStreamReader::StartElement: if (m_currentContext) { m_currentContext->rules.push_back(Context::Rule{}); auto &rule = m_currentContext->rules.back(); m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success; m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True; } else if (m_currentKeywords) { m_inKeywordItem = true; } else if (xml.name() == u"context"_sv) { processContextElement(xml); } else if (xml.name() == u"list"_sv) { processListElement(xml); } else if (xml.name() == u"keywords"_sv) { m_success = m_currentDefinition->parseKeywords(xml) && m_success; } else if (xml.name() == u"emptyLine"_sv) { m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success; } else if (xml.name() == u"itemData"_sv) { m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success; } break; case QXmlStreamReader::EndElement: if (m_currentContext && xml.name() == u"context"_sv) { m_currentContext = nullptr; } else if (m_currentKeywords && xml.name() == u"list"_sv) { m_currentKeywords = nullptr; } else if (m_currentKeywords) { m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml, m_textContent) && m_success; m_textContent.clear(); m_inKeywordItem = false; } break; case QXmlStreamReader::EntityReference: case QXmlStreamReader::Characters: if (m_inKeywordItem) { m_textContent += xml.text(); } break; default:; } } //! Resolve context attribute and include tag void resolveContexts() { QMutableMapIterator def(m_definitions); while (def.hasNext()) { def.next(); auto &definition = def.value(); auto &contexts = definition.contexts; if (contexts.isEmpty()) { qWarning() << definition.filename << "has no context"; m_success = false; continue; } auto markAsUsedContext = [](ContextName &contextName) { if (!contextName.stay && contextName.context) { contextName.context->isOnlyIncluded = false; } }; QMutableMapIterator contextIt(contexts); while (contextIt.hasNext()) { contextIt.next(); auto &context = contextIt.value(); resolveContextName(definition, context, context.lineEndContext, context.line); resolveContextName(definition, context, context.lineEmptyContext, context.line); resolveContextName(definition, context, context.fallthroughContext, context.line); markAsUsedContext(context.lineEndContext); markAsUsedContext(context.lineEmptyContext); markAsUsedContext(context.fallthroughContext); for (auto &rule : context.rules) { rule.parentContext = &context; resolveContextName(definition, context, rule.context, rule.line); if (rule.type != Context::Rule::Type::IncludeRules) { markAsUsedContext(rule.context); } else if (rule.includeAttrib == XmlBool::True && rule.context.context) { rule.context.context->referencedWithIncludeAttrib = true; } } } auto *firstContext = &*definition.contexts.find(definition.firstContextName); firstContext->isOnlyIncluded = false; definition.firstContext = firstContext; } resolveIncludeRules(); } bool check() const { bool success = m_success; const auto usedContexts = extractUsedContexts(); QMap maxVersionByDefinitions; QMap unreachableIncludedRules; QMapIterator def(m_definitions); while (def.hasNext()) { def.next(); const auto &definition = def.value(); const auto &filename = definition.filename; auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions); if (maxDef != &definition) { qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr << ". Please, increase kateversion."; success = false; } QSet usedAttributeNames; QSet ignoredAttributeNames; success = checkKeywordsList(definition) && success; success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success; // search for non-existing itemDatas. const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames; for (const auto &styleName : invalidNames) { qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name; success = false; } // search for existing itemDatas, but unusable. const auto ignoredNames = ignoredAttributeNames - usedAttributeNames; for (const auto &styleName : ignoredNames) { qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name << "is never used. All uses are with lookAhead=true or "; success = false; } // search for unused itemDatas. auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames; unusedNames -= ignoredNames; for (const auto &styleName : std::as_const(unusedNames)) { qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name; success = false; } } QMutableMapIterator unreachableIncludedRuleIt(unreachableIncludedRules); while (unreachableIncludedRuleIt.hasNext()) { unreachableIncludedRuleIt.next(); IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value(); if (unreachableRulesBy.alwaysUnreachable) { auto *rule = unreachableIncludedRuleIt.key(); if (!rule->parentContext->isOnlyIncluded) { continue; } // remove duplicates rules QSet rules; auto &unreachableBy = unreachableRulesBy.unreachableBy; unreachableBy.erase(std::remove_if(unreachableBy.begin(), unreachableBy.end(), [&](const RuleAndInclude &ruleAndInclude) { if (rules.contains(ruleAndInclude.rule)) { return true; } rules.insert(ruleAndInclude.rule); return false; }), unreachableBy.end()); QString message; message.reserve(128); for (auto &ruleAndInclude : std::as_const(unreachableBy)) { message += u"line "_sv; message += QString::number(ruleAndInclude.rule->line); message += u" ["_sv; message += ruleAndInclude.rule->parentContext->name; if (rule->filename != ruleAndInclude.rule->filename) { message += u" ("_sv; message += ruleAndInclude.rule->filename; message += u')'; } if (ruleAndInclude.includeRules) { message += u" via line "_sv; message += QString::number(ruleAndInclude.includeRules->line); } message += u"], "_sv; } message.chop(2); qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message; success = false; } } return success; } private: enum class XmlBool { Unspecified, False, True, }; struct Context; struct ContextName { QString name; int popCount = 0; bool stay = false; Context *context = nullptr; }; struct Parser { const QString &filename; const QXmlStreamReader &xml; const QXmlStreamAttribute &attr; bool success; //! Read a string type attribute, \c success = \c false when \p str is not empty //! \return \c true when attr.name() == attrName, otherwise false bool extractString(QString &str, QStringView attrName) { if (attr.name() != attrName) { return false; } str = attr.value().toString(); if (str.isEmpty()) { qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty"; success = false; } return true; } //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified. //! \return \c true when attr.name() == attrName, otherwise false bool extractXmlBool(XmlBool &xmlBool, QStringView attrName) { if (attr.name() != attrName) { return false; } xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False; return true; } //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0 //! \return \c true when attr.name() == attrName, otherwise false bool extractPositive(int &positive, QStringView attrName) { if (attr.name() != attrName) { return false; } bool ok = true; positive = attr.value().toInt(&ok); if (!ok || positive < 0) { qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value(); success = false; } return true; } //! Read a color, \c success = \c false when \p color is already greater than or equal to 0 //! \return \c true when attr.name() == attrName, otherwise false bool checkColor(QStringView attrName) { if (attr.name() != attrName) { return false; } const auto value = attr.value(); if (value.isEmpty() /*|| QColor(value).isValid()*/) { qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value; success = false; } return true; } //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char //! \return \c true when attr.name() == attrName, otherwise false bool extractChar(QChar &c, QStringView attrName) { if (attr.name() != attrName) { return false; } if (attr.value().size() == 1) { c = attr.value()[0]; } else { c = u'_'; qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value(); success = false; } return true; } //! \return parsing status when \p isExtracted is \c true, otherwise \c false bool checkIfExtracted(bool isExtracted) { if (isExtracted) { return success; } qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name(); return false; } }; struct Keywords { struct Items { struct Item { QString content; int line; friend size_t qHash(const Item &item, size_t seed = 0) { return qHash(item.content, seed); } friend bool operator==(const Item &item0, const Item &item1) { return item0.content == item1.content; } }; QList keywords; QSet includes; bool parseElement(const QString &filename, const QXmlStreamReader &xml, const QString &content) { bool success = true; const int line = xml.lineNumber(); if (content.isEmpty()) { qWarning() << filename << "line" << line << "is empty:" << xml.name(); success = false; } if (xml.name() == u"include"_sv) { includes.insert({content, line}); } else if (xml.name() == u"item"_sv) { keywords.append({content, line}); } else { qWarning() << filename << "line" << line << "invalid element:" << xml.name(); success = false; } return success; } }; QString name; Items items; int line; bool parseElement(const QString &filename, const QXmlStreamReader &xml) { line = xml.lineNumber(); bool success = true; const auto attrs = xml.attributes(); for (const auto &attr : attrs) { Parser parser{filename, xml, attr, success}; const bool isExtracted = parser.extractString(name, u"name"_sv); success = parser.checkIfExtracted(isExtracted); } return success; } }; struct Context { struct Rule { enum class Type { Unknown, AnyChar, Detect2Chars, DetectChar, DetectIdentifier, DetectSpaces, Float, HlCChar, HlCHex, HlCOct, HlCStringChar, IncludeRules, Int, LineContinue, RangeDetect, RegExpr, StringDetect, WordDetect, keyword, }; Type type{}; bool isDotRegex = false; int line = -1; // commonAttributes QString attribute; ContextName context; QString beginRegion; QString endRegion; int column = -1; XmlBool lookAhead{}; XmlBool firstNonSpace{}; // StringDetect, WordDetect, keyword XmlBool insensitive{}; // DetectChar, StringDetect, RegExpr, keyword XmlBool dynamic{}; // Regex XmlBool minimal{}; // IncludeRule XmlBool includeAttrib{}; // DetectChar, Detect2Chars, LineContinue, RangeDetect QChar char0; // Detect2Chars, RangeDetect QChar char1; // AnyChar, StringDetect, RegExpr, WordDetect, keyword QString string; // RegExpr without .* as suffix QString sanitizedString; // Float, HlCHex, HlCOct, Int, WordDetect, keyword QString additionalDeliminator; QString weakDeliminator; // rules included by IncludeRules (without IncludeRule) QList includedRules; // IncludeRules included by IncludeRules QSet includedIncludeRules; Context const *parentContext = nullptr; QString filename; bool parseElement(const QString &filename, const QXmlStreamReader &xml) { this->filename = filename; line = xml.lineNumber(); using Pair = QPair; static const auto pairs = { Pair{u"AnyChar"_sv, Type::AnyChar}, Pair{u"Detect2Chars"_sv, Type::Detect2Chars}, Pair{u"DetectChar"_sv, Type::DetectChar}, Pair{u"DetectIdentifier"_sv, Type::DetectIdentifier}, Pair{u"DetectSpaces"_sv, Type::DetectSpaces}, Pair{u"Float"_sv, Type::Float}, Pair{u"HlCChar"_sv, Type::HlCChar}, Pair{u"HlCHex"_sv, Type::HlCHex}, Pair{u"HlCOct"_sv, Type::HlCOct}, Pair{u"HlCStringChar"_sv, Type::HlCStringChar}, Pair{u"IncludeRules"_sv, Type::IncludeRules}, Pair{u"Int"_sv, Type::Int}, Pair{u"LineContinue"_sv, Type::LineContinue}, Pair{u"RangeDetect"_sv, Type::RangeDetect}, Pair{u"RegExpr"_sv, Type::RegExpr}, Pair{u"StringDetect"_sv, Type::StringDetect}, Pair{u"WordDetect"_sv, Type::WordDetect}, Pair{u"keyword", Type::keyword}, }; for (auto pair : pairs) { if (xml.name() == pair.first) { type = pair.second; bool success = parseAttributes(filename, xml); success = checkMandoryAttributes(filename, xml) && success; if (success && type == Type::RegExpr) { // ., (.) followed by *, +, {1} or nothing static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)")); // remove "(?:" and ")" static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))")); // remove parentheses on a copy of string auto reg = QString(string).replace(removeParentheses, QString()); isDotRegex = reg.contains(isDot); // Remove .* and .*$ suffix. static const QRegularExpression allSuffix(QStringLiteral("(? rules; XmlBool dynamic{}; XmlBool fallthrough{}; XmlBool stopEmptyLineContextSwitchLoop{}; bool parseElement(const QString &filename, const QXmlStreamReader &xml) { line = xml.lineNumber(); bool success = true; const auto attrs = xml.attributes(); for (const auto &attr : attrs) { Parser parser{filename, xml, attr, success}; XmlBool noIndentationBasedFolding{}; // clang-format off const bool isExtracted = parser.extractString(name, u"name"_sv) || parser.extractString(attribute, u"attribute"_sv) || parser.extractString(lineEndContext.name, u"lineEndContext"_sv) || parser.extractString(lineEmptyContext.name, u"lineEmptyContext"_sv) || parser.extractString(fallthroughContext.name, u"fallthroughContext"_sv) || parser.extractXmlBool(dynamic, u"dynamic"_sv) || parser.extractXmlBool(fallthrough, u"fallthrough"_sv) || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, u"stopEmptyLineContextSwitchLoop"_sv) || parser.extractXmlBool(noIndentationBasedFolding, u"noIndentationBasedFolding"_sv); // clang-format on success = parser.checkIfExtracted(isExtracted); } if (name.isEmpty()) { qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name"; success = false; } if (attribute.isEmpty()) { qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute"; success = false; } return success; } }; struct ItemDatas { struct Style { QString name; int line; friend size_t qHash(const Style &style, size_t seed = 0) { return qHash(style.name, seed); } friend bool operator==(const Style &style0, const Style &style1) { return style0.name == style1.name; } }; QSet