gooderp18绿色标准版
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 line
14KB

  1. # -*- coding: utf-8 -*-
  2. """Utilities for generating, parsing and checking XML/XSD files on top of the lxml.etree module."""
  3. import base64
  4. import contextlib
  5. import logging
  6. import re
  7. import zipfile
  8. from io import BytesIO
  9. import requests
  10. from lxml import etree
  11. from odoo.exceptions import UserError
  12. from odoo.tools.misc import file_open
  13. __all__ = [
  14. "cleanup_xml_node",
  15. "load_xsd_files_from_url",
  16. "validate_xml_from_attachment",
  17. ]
  18. _logger = logging.getLogger(__name__)
  19. def remove_control_characters(byte_node):
  20. """
  21. The characters to be escaped are the control characters #x0 to #x1F and #x7F (most of which cannot appear in XML)
  22. [...] XML processors must accept any character in the range specified for Char:
  23. `Char :: = #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
  24. source:https://www.w3.org/TR/xml/
  25. """
  26. return re.sub(
  27. '[^'
  28. '\u0009'
  29. '\u000A'
  30. '\u000D'
  31. '\u0020-\uD7FF'
  32. '\uE000-\uFFFD'
  33. '\U00010000-\U0010FFFF'
  34. ']'.encode(),
  35. b'',
  36. byte_node,
  37. )
  38. class odoo_resolver(etree.Resolver):
  39. """Odoo specific file resolver that can be added to the XML Parser.
  40. It will search filenames in the ir.attachments
  41. """
  42. def __init__(self, env, prefix):
  43. super().__init__()
  44. self.env = env
  45. self.prefix = prefix
  46. def resolve(self, url, id, context):
  47. """Search url in ``ir.attachment`` and return the resolved content."""
  48. attachment_name = f'{self.prefix}.{url}' if self.prefix else url
  49. attachment = self.env['ir.attachment'].search([('name', '=', attachment_name)])
  50. if attachment:
  51. return self.resolve_string(attachment.raw, context)
  52. def _validate_xml(env, url, path, xmls):
  53. # Get the XSD data
  54. xsd_attachment = env['ir.attachment']
  55. if path:
  56. with file_open(path, filter_ext=('.xsd',)) as file:
  57. content = file.read()
  58. attachment_vals = {
  59. 'name': path.split('/')[-1],
  60. 'datas': base64.b64encode(content.encode()),
  61. }
  62. xsd_attachment = env['ir.attachment'].create(attachment_vals)
  63. elif url:
  64. xsd_attachment = load_xsd_files_from_url(env, url)
  65. # Validate the XML against the XSD
  66. if not isinstance(xmls, list):
  67. xmls = [xmls]
  68. for xml in xmls:
  69. validate_xml_from_attachment(env, xml, xsd_attachment.name)
  70. xsd_attachment.unlink()
  71. def _check_with_xsd(tree_or_str, stream, env=None, prefix=None):
  72. """Check an XML against an XSD schema.
  73. This will raise a UserError if the XML file is not valid according to the
  74. XSD file.
  75. :param str | etree._Element tree_or_str: representation of the tree to be checked
  76. :param io.IOBase | str stream: the byte stream used to build the XSD schema.
  77. If env is given, it can also be the name of an attachment in the filestore
  78. :param odoo.api.Environment env: If it is given, it enables resolving the
  79. imports of the schema in the filestore with ir.attachments.
  80. :param str prefix: if given, provides a prefix to try when
  81. resolving the imports of the schema. e.g. prefix='l10n_cl_edi' will
  82. enable 'SiiTypes_v10.xsd' to be resolved to 'l10n_cl_edi.SiiTypes_v10.xsd'.
  83. """
  84. if not isinstance(tree_or_str, etree._Element):
  85. tree_or_str = etree.fromstring(tree_or_str)
  86. parser = etree.XMLParser()
  87. if env:
  88. parser.resolvers.add(odoo_resolver(env, prefix))
  89. if isinstance(stream, str) and stream.endswith('.xsd'):
  90. attachment = env['ir.attachment'].search([('name', '=', stream)])
  91. if not attachment:
  92. raise FileNotFoundError()
  93. stream = BytesIO(attachment.raw)
  94. xsd_schema = etree.XMLSchema(etree.parse(stream, parser=parser))
  95. try:
  96. xsd_schema.assertValid(tree_or_str)
  97. except etree.DocumentInvalid as xml_errors:
  98. raise UserError('\n'.join(str(e) for e in xml_errors.error_log))
  99. def create_xml_node_chain(first_parent_node, nodes_list, last_node_value=None):
  100. """Generate a hierarchical chain of nodes.
  101. Each new node being the child of the previous one based on the tags contained
  102. in `nodes_list`, under the given node `first_parent_node`.
  103. :param etree._Element first_parent_node: parent of the created tree/chain
  104. :param Iterable[str] nodes_list: tag names to be created
  105. :param str last_node_value: if specified, set the last node's text to this value
  106. :returns: the list of created nodes
  107. :rtype: list[etree._Element]
  108. """
  109. res = []
  110. current_node = first_parent_node
  111. for tag in nodes_list:
  112. current_node = etree.SubElement(current_node, tag)
  113. res.append(current_node)
  114. if last_node_value is not None:
  115. current_node.text = last_node_value
  116. return res
  117. def create_xml_node(parent_node, node_name, node_value=None):
  118. """Create a new node.
  119. :param etree._Element parent_node: parent of the created node
  120. :param str node_name: name of the created node
  121. :param str node_value: value of the created node (optional)
  122. :rtype: etree._Element
  123. """
  124. return create_xml_node_chain(parent_node, [node_name], node_value)[0]
  125. def cleanup_xml_node(xml_node_or_string, remove_blank_text=True, remove_blank_nodes=True, indent_level=0, indent_space=" "):
  126. """Clean up the sub-tree of the provided XML node.
  127. If the provided XML node is of type:
  128. - etree._Element, it is modified in-place.
  129. - string/bytes, it is first parsed into an etree._Element
  130. :param xml_node_or_string (etree._Element, str): XML node (or its string/bytes representation)
  131. :param remove_blank_text (bool): if True, removes whitespace-only text from nodes
  132. :param remove_blank_nodes (bool): if True, removes leaf nodes with no text (iterative, depth-first, done after remove_blank_text)
  133. :param indent_level (int): depth or level of node within root tree (use -1 to leave indentation as-is)
  134. :param indent_space (str): string to use for indentation (use '' to remove all indentation)
  135. :returns (etree._Element): clean node, same instance that was received (if applicable)
  136. """
  137. xml_node = xml_node_or_string
  138. # Convert str/bytes to etree._Element
  139. if isinstance(xml_node, str):
  140. xml_node = xml_node.encode() # misnomer: fromstring actually reads bytes
  141. if isinstance(xml_node, bytes):
  142. parser = etree.XMLParser(recover=True, resolve_entities=False)
  143. xml_node = etree.fromstring(remove_control_characters(xml_node), parser=parser)
  144. # Process leaf nodes iteratively
  145. # Depth-first, so any inner node may become a leaf too (if children are removed)
  146. def leaf_iter(parent_node, node, level):
  147. for child_node in node:
  148. leaf_iter(node, child_node, level if level < 0 else level + 1)
  149. # Indentation
  150. if level >= 0:
  151. indent = '\n' + indent_space * level
  152. if not node.tail or not node.tail.strip():
  153. node.tail = '\n' if parent_node is None else indent
  154. if len(node) > 0:
  155. if not node.text or not node.text.strip():
  156. # First child's indentation is parent's text
  157. node.text = indent + indent_space
  158. last_child = node[-1]
  159. if last_child.tail == indent + indent_space:
  160. # Last child's tail is parent's closing tag indentation
  161. last_child.tail = indent
  162. # Removal condition: node is leaf (not root nor inner node)
  163. if parent_node is not None and len(node) == 0:
  164. if remove_blank_text and node.text is not None and not node.text.strip():
  165. # node.text is None iff node.tag is self-closing (text='' creates closing tag)
  166. node.text = ''
  167. if remove_blank_nodes and not (node.text or ''):
  168. parent_node.remove(node)
  169. leaf_iter(None, xml_node, indent_level)
  170. return xml_node
  171. def load_xsd_files_from_url(env, url, file_name=None, force_reload=False,
  172. request_max_timeout=10, xsd_name_prefix='', xsd_names_filter=None, modify_xsd_content=None):
  173. """Load XSD file or ZIP archive. Save XSD files as ir.attachment.
  174. An XSD attachment is saved as {xsd_name_prefix}.{filename} where the filename is either the filename obtained
  175. from the URL or from the ZIP archive, or the `file_name` param if it is specified and a single XSD is being downloaded.
  176. A typical prefix is the calling module's name.
  177. For ZIP archives, XSD files inside it will be saved as attachments, depending on the provided list of XSD names.
  178. ZIP archive themselves are not saved.
  179. The XSD files content can be modified by providing the `modify_xsd_content` function as argument.
  180. Typically, this is used when XSD files depend on each other (with the schemaLocation attribute),
  181. but it can be used for any purpose.
  182. :param odoo.api.Environment env: environment of calling module
  183. :param str url: URL of XSD file/ZIP archive
  184. :param str file_name: used as attachment name if the URL leads to a single XSD, otherwise ignored
  185. :param bool force_reload: Deprecated.
  186. :param int request_max_timeout: maximum time (in seconds) before the request times out
  187. :param str xsd_name_prefix: if provided, will be added as a prefix to every XSD file name
  188. :param list | str xsd_names_filter: if provided, will only save the XSD files with these names
  189. :param func modify_xsd_content: function that takes the xsd content as argument and returns a modified version of it
  190. :rtype: odoo.api.ir.attachment | bool
  191. :return: every XSD attachment created/fetched or False if an error occurred (see warning logs)
  192. """
  193. try:
  194. _logger.info("Fetching file/archive from given URL: %s", url)
  195. response = requests.get(url, timeout=request_max_timeout)
  196. response.raise_for_status()
  197. except requests.exceptions.HTTPError as error:
  198. _logger.warning('HTTP error: %s with the given URL: %s', error, url)
  199. return False
  200. except requests.exceptions.ConnectionError as error:
  201. _logger.warning('Connection error: %s with the given URL: %s', error, url)
  202. return False
  203. except requests.exceptions.Timeout as error:
  204. _logger.warning('Request timeout: %s with the given URL: %s', error, url)
  205. return False
  206. content = response.content
  207. if not content:
  208. _logger.warning("The HTTP response from %s is empty (no content)", url)
  209. return False
  210. archive = None
  211. with contextlib.suppress(zipfile.BadZipFile):
  212. archive = zipfile.ZipFile(BytesIO(content))
  213. if archive is None:
  214. if modify_xsd_content:
  215. content = modify_xsd_content(content)
  216. if not file_name:
  217. file_name = f"{url.split('/')[-1]}"
  218. _logger.info("XSD name not provided, defaulting to %s", file_name)
  219. prefixed_xsd_name = f"{xsd_name_prefix}.{file_name}" if xsd_name_prefix else file_name
  220. fetched_attachment = env['ir.attachment'].search([('name', '=', prefixed_xsd_name)], limit=1)
  221. if fetched_attachment:
  222. _logger.info("Updating the content of ir.attachment with name: %s", prefixed_xsd_name)
  223. fetched_attachment.raw = content
  224. return fetched_attachment
  225. else:
  226. _logger.info("Saving XSD file as ir.attachment, with name: %s", prefixed_xsd_name)
  227. return env['ir.attachment'].create({
  228. 'name': prefixed_xsd_name,
  229. 'raw': content,
  230. 'public': True,
  231. })
  232. saved_attachments = env['ir.attachment']
  233. for file_path in archive.namelist():
  234. if not file_path.endswith('.xsd'):
  235. continue
  236. file_name = file_path.rsplit('/', 1)[-1]
  237. if xsd_names_filter and file_name not in xsd_names_filter:
  238. _logger.info("Skipping file with name %s in ZIP archive", file_name)
  239. continue
  240. try:
  241. content = archive.read(file_path)
  242. except KeyError:
  243. _logger.warning("Failed to retrieve XSD file with name %s from ZIP archive", file_name)
  244. continue
  245. if modify_xsd_content:
  246. content = modify_xsd_content(content)
  247. prefixed_xsd_name = f"{xsd_name_prefix}.{file_name}" if xsd_name_prefix else file_name
  248. fetched_attachment = env['ir.attachment'].search([('name', '=', prefixed_xsd_name)], limit=1)
  249. if fetched_attachment:
  250. _logger.info("Updating the content of ir.attachment with name: %s", prefixed_xsd_name)
  251. fetched_attachment.raw = content
  252. saved_attachments |= fetched_attachment
  253. else:
  254. _logger.info("Saving XSD file as ir.attachment, with name: %s", prefixed_xsd_name)
  255. saved_attachments |= env['ir.attachment'].create({
  256. 'name': prefixed_xsd_name,
  257. 'raw': content,
  258. 'public': True,
  259. })
  260. return saved_attachments
  261. def validate_xml_from_attachment(env, xml_content, xsd_name, reload_files_function=None, prefix=None):
  262. """Try and validate the XML content with an XSD attachment.
  263. If the XSD attachment cannot be found in database, skip validation without raising.
  264. :param odoo.api.Environment env: environment of calling module
  265. :param xml_content: the XML content to validate
  266. :param xsd_name: the XSD file name in database
  267. :param reload_files_function: Deprecated.
  268. :return: the result of the function :func:`odoo.tools.xml_utils._check_with_xsd`
  269. """
  270. prefixed_xsd_name = f"{prefix}.{xsd_name}" if prefix else xsd_name
  271. try:
  272. _logger.info("Validating with XSD...")
  273. _check_with_xsd(xml_content, prefixed_xsd_name, env, prefix)
  274. _logger.info("XSD validation successful!")
  275. except FileNotFoundError:
  276. _logger.info("XSD file not found, skipping validation")
  277. except etree.XMLSchemaParseError as e:
  278. _logger.error("XSD file not valid: ")
  279. for arg in e.args:
  280. _logger.error(arg)
  281. def find_xml_value(xpath, xml_element, namespaces=None):
  282. element = xml_element.xpath(xpath, namespaces=namespaces)
  283. return element[0].text if element else None
上海开阖软件有限公司 沪ICP备12045867号-1