| #!/usr/bin/env python3 |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. |
| # |
| # pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 |
| |
| """ |
| kdoc_parser |
| =========== |
| |
| Read a C language source or header FILE and extract embedded |
| documentation comments |
| """ |
| |
| import re |
| from pprint import pformat |
| |
| from kdoc_re import NestedMatch, KernRe |
| |
| |
| # |
| # Regular expressions used to parse kernel-doc markups at KernelDoc class. |
| # |
| # Let's declare them in lowercase outside any class to make easier to |
| # convert from the python script. |
| # |
| # As those are evaluated at the beginning, no need to cache them |
| # |
| |
| # Allow whitespace at end of comment start. |
| doc_start = KernRe(r'^/\*\*\s*$', cache=False) |
| |
| doc_end = KernRe(r'\*/', cache=False) |
| doc_com = KernRe(r'\s*\*\s*', cache=False) |
| doc_com_body = KernRe(r'\s*\* ?', cache=False) |
| doc_decl = doc_com + KernRe(r'(\w+)', cache=False) |
| |
| # @params and a strictly limited set of supported section names |
| # Specifically: |
| # Match @word: |
| # @...: |
| # @{section-name}: |
| # while trying to not match literal block starts like "example::" |
| # |
| doc_sect = doc_com + \ |
| KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', |
| flags=re.I, cache=False) |
| |
| doc_content = doc_com_body + KernRe(r'(.*)', cache=False) |
| doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) |
| doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) |
| doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) |
| doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) |
| doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) |
| attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", |
| flags=re.I | re.S, cache=False) |
| |
| export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) |
| export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) |
| |
| type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) |
| |
| class state: |
| """ |
| State machine enums |
| """ |
| |
| # Parser states |
| NORMAL = 0 # normal code |
| NAME = 1 # looking for function name |
| BODY_MAYBE = 2 # body - or maybe more description |
| BODY = 3 # the body of the comment |
| BODY_WITH_BLANK_LINE = 4 # the body which has a blank line |
| PROTO = 5 # scanning prototype |
| DOCBLOCK = 6 # documentation block |
| INLINE = 7 # gathering doc outside main block |
| |
| name = [ |
| "NORMAL", |
| "NAME", |
| "BODY_MAYBE", |
| "BODY", |
| "BODY_WITH_BLANK_LINE", |
| "PROTO", |
| "DOCBLOCK", |
| "INLINE", |
| ] |
| |
| # Inline documentation state |
| INLINE_NA = 0 # not applicable ($state != INLINE) |
| INLINE_NAME = 1 # looking for member name (@foo:) |
| INLINE_TEXT = 2 # looking for member documentation |
| INLINE_END = 3 # done |
| INLINE_ERROR = 4 # error - Comment without header was found. |
| # Spit a warning as it's not |
| # proper kernel-doc and ignore the rest. |
| |
| inline_name = [ |
| "", |
| "_NAME", |
| "_TEXT", |
| "_END", |
| "_ERROR", |
| ] |
| |
| SECTION_DEFAULT = "Description" # default section |
| |
| class KernelEntry: |
| |
| def __init__(self, config, ln): |
| self.config = config |
| |
| self.contents = "" |
| self.function = "" |
| self.sectcheck = "" |
| self.struct_actual = "" |
| self.prototype = "" |
| |
| self.warnings = [] |
| |
| self.parameterlist = [] |
| self.parameterdescs = {} |
| self.parametertypes = {} |
| self.parameterdesc_start_lines = {} |
| |
| self.section_start_lines = {} |
| self.sectionlist = [] |
| self.sections = {} |
| |
| self.anon_struct_union = False |
| |
| self.leading_space = None |
| |
| # State flags |
| self.brcount = 0 |
| |
| self.in_doc_sect = False |
| self.declaration_start_line = ln + 1 |
| |
| # TODO: rename to emit_message after removal of kernel-doc.pl |
| def emit_msg(self, log_msg, warning=True): |
| """Emit a message""" |
| |
| if not warning: |
| self.config.log.info(log_msg) |
| return |
| |
| # Delegate warning output to output logic, as this way it |
| # will report warnings/info only for symbols that are output |
| |
| self.warnings.append(log_msg) |
| return |
| |
| def dump_section(self, start_new=True): |
| """ |
| Dumps section contents to arrays/hashes intended for that purpose. |
| """ |
| |
| name = self.section |
| contents = self.contents |
| |
| if type_param.match(name): |
| name = type_param.group(1) |
| |
| self.parameterdescs[name] = contents |
| self.parameterdesc_start_lines[name] = self.new_start_line |
| |
| self.sectcheck += name + " " |
| self.new_start_line = 0 |
| |
| elif name == "@...": |
| name = "..." |
| self.parameterdescs[name] = contents |
| self.sectcheck += name + " " |
| self.parameterdesc_start_lines[name] = self.new_start_line |
| self.new_start_line = 0 |
| |
| else: |
| if name in self.sections and self.sections[name] != "": |
| # Only warn on user-specified duplicate section names |
| if name != SECTION_DEFAULT: |
| self.emit_msg(self.new_start_line, |
| f"duplicate section name '{name}'\n") |
| self.sections[name] += contents |
| else: |
| self.sections[name] = contents |
| self.sectionlist.append(name) |
| self.section_start_lines[name] = self.new_start_line |
| self.new_start_line = 0 |
| |
| # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) |
| |
| if start_new: |
| self.section = SECTION_DEFAULT |
| self.contents = "" |
| |
| |
| class KernelDoc: |
| """ |
| Read a C language source or header FILE and extract embedded |
| documentation comments. |
| """ |
| |
| # Section names |
| |
| section_intro = "Introduction" |
| section_context = "Context" |
| section_return = "Return" |
| |
| undescribed = "-- undescribed --" |
| |
| def __init__(self, config, fname): |
| """Initialize internal variables""" |
| |
| self.fname = fname |
| self.config = config |
| |
| # Initial state for the state machines |
| self.state = state.NORMAL |
| self.inline_doc_state = state.INLINE_NA |
| |
| # Store entry currently being processed |
| self.entry = None |
| |
| # Place all potential outputs into an array |
| self.entries = [] |
| |
| def emit_msg(self, ln, msg, warning=True): |
| """Emit a message""" |
| |
| log_msg = f"{self.fname}:{ln} {msg}" |
| |
| if self.entry: |
| self.entry.emit_msg(log_msg, warning) |
| return |
| |
| if warning: |
| self.config.log.warning(log_msg) |
| else: |
| self.config.log.info(log_msg) |
| |
| def dump_section(self, start_new=True): |
| """ |
| Dumps section contents to arrays/hashes intended for that purpose. |
| """ |
| |
| if self.entry: |
| self.entry.dump_section(start_new) |
| |
| # TODO: rename it to store_declaration after removal of kernel-doc.pl |
| def output_declaration(self, dtype, name, **args): |
| """ |
| Stores the entry into an entry array. |
| |
| The actual output and output filters will be handled elsewhere |
| """ |
| |
| # The implementation here is different than the original kernel-doc: |
| # instead of checking for output filters or actually output anything, |
| # it just stores the declaration content at self.entries, as the |
| # output will happen on a separate class. |
| # |
| # For now, we're keeping the same name of the function just to make |
| # easier to compare the source code of both scripts |
| |
| args["declaration_start_line"] = self.entry.declaration_start_line |
| args["type"] = dtype |
| args["warnings"] = self.entry.warnings |
| |
| # TODO: use colletions.OrderedDict to remove sectionlist |
| |
| sections = args.get('sections', {}) |
| sectionlist = args.get('sectionlist', []) |
| |
| # Drop empty sections |
| # TODO: improve empty sections logic to emit warnings |
| for section in ["Description", "Return"]: |
| if section in sectionlist: |
| if not sections[section].rstrip(): |
| del sections[section] |
| sectionlist.remove(section) |
| |
| self.entries.append((name, args)) |
| |
| self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) |
| |
| def reset_state(self, ln): |
| """ |
| Ancillary routine to create a new entry. It initializes all |
| variables used by the state machine. |
| """ |
| |
| self.entry = KernelEntry(self.config, ln) |
| |
| # State flags |
| self.state = state.NORMAL |
| self.inline_doc_state = state.INLINE_NA |
| |
| def push_parameter(self, ln, decl_type, param, dtype, |
| org_arg, declaration_name): |
| """ |
| Store parameters and their descriptions at self.entry. |
| """ |
| |
| if self.entry.anon_struct_union and dtype == "" and param == "}": |
| return # Ignore the ending }; from anonymous struct/union |
| |
| self.entry.anon_struct_union = False |
| |
| param = KernRe(r'[\[\)].*').sub('', param, count=1) |
| |
| if dtype == "" and param.endswith("..."): |
| if KernRe(r'\w\.\.\.$').search(param): |
| # For named variable parameters of the form `x...`, |
| # remove the dots |
| param = param[:-3] |
| else: |
| # Handles unnamed variable parameters |
| param = "..." |
| |
| if param not in self.entry.parameterdescs or \ |
| not self.entry.parameterdescs[param]: |
| |
| self.entry.parameterdescs[param] = "variable arguments" |
| |
| elif dtype == "" and (not param or param == "void"): |
| param = "void" |
| self.entry.parameterdescs[param] = "no arguments" |
| |
| elif dtype == "" and param in ["struct", "union"]: |
| # Handle unnamed (anonymous) union or struct |
| dtype = param |
| param = "{unnamed_" + param + "}" |
| self.entry.parameterdescs[param] = "anonymous\n" |
| self.entry.anon_struct_union = True |
| |
| # Handle cache group enforcing variables: they do not need |
| # to be described in header files |
| elif "__cacheline_group" in param: |
| # Ignore __cacheline_group_begin and __cacheline_group_end |
| return |
| |
| # Warn if parameter has no description |
| # (but ignore ones starting with # as these are not parameters |
| # but inline preprocessor statements) |
| if param not in self.entry.parameterdescs and not param.startswith("#"): |
| self.entry.parameterdescs[param] = self.undescribed |
| |
| if "." not in param: |
| if decl_type == 'function': |
| dname = f"{decl_type} parameter" |
| else: |
| dname = f"{decl_type} member" |
| |
| self.emit_msg(ln, |
| f"{dname} '{param}' not described in '{declaration_name}'") |
| |
| # Strip spaces from param so that it is one continuous string on |
| # parameterlist. This fixes a problem where check_sections() |
| # cannot find a parameter like "addr[6 + 2]" because it actually |
| # appears as "addr[6", "+", "2]" on the parameter list. |
| # However, it's better to maintain the param string unchanged for |
| # output, so just weaken the string compare in check_sections() |
| # to ignore "[blah" in a parameter string. |
| |
| self.entry.parameterlist.append(param) |
| org_arg = KernRe(r'\s\s+').sub(' ', org_arg) |
| self.entry.parametertypes[param] = org_arg |
| |
| def save_struct_actual(self, actual): |
| """ |
| Strip all spaces from the actual param so that it looks like |
| one string item. |
| """ |
| |
| actual = KernRe(r'\s*').sub("", actual, count=1) |
| |
| self.entry.struct_actual += actual + " " |
| |
| def create_parameter_list(self, ln, decl_type, args, |
| splitter, declaration_name): |
| """ |
| Creates a list of parameters, storing them at self.entry. |
| """ |
| |
| # temporarily replace all commas inside function pointer definition |
| arg_expr = KernRe(r'(\([^\),]+),') |
| while arg_expr.search(args): |
| args = arg_expr.sub(r"\1#", args) |
| |
| for arg in args.split(splitter): |
| # Strip comments |
| arg = KernRe(r'\/\*.*\*\/').sub('', arg) |
| |
| # Ignore argument attributes |
| arg = KernRe(r'\sPOS0?\s').sub(' ', arg) |
| |
| # Strip leading/trailing spaces |
| arg = arg.strip() |
| arg = KernRe(r'\s+').sub(' ', arg, count=1) |
| |
| if arg.startswith('#'): |
| # Treat preprocessor directive as a typeless variable just to fill |
| # corresponding data structures "correctly". Catch it later in |
| # output_* subs. |
| |
| # Treat preprocessor directive as a typeless variable |
| self.push_parameter(ln, decl_type, arg, "", |
| "", declaration_name) |
| |
| elif KernRe(r'\(.+\)\s*\(').search(arg): |
| # Pointer-to-function |
| |
| arg = arg.replace('#', ',') |
| |
| r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') |
| if r.match(arg): |
| param = r.group(1) |
| else: |
| self.emit_msg(ln, f"Invalid param: {arg}") |
| param = arg |
| |
| dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) |
| self.save_struct_actual(param) |
| self.push_parameter(ln, decl_type, param, dtype, |
| arg, declaration_name) |
| |
| elif KernRe(r'\(.+\)\s*\[').search(arg): |
| # Array-of-pointers |
| |
| arg = arg.replace('#', ',') |
| r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') |
| if r.match(arg): |
| param = r.group(1) |
| else: |
| self.emit_msg(ln, f"Invalid param: {arg}") |
| param = arg |
| |
| dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) |
| |
| self.save_struct_actual(param) |
| self.push_parameter(ln, decl_type, param, dtype, |
| arg, declaration_name) |
| |
| elif arg: |
| arg = KernRe(r'\s*:\s*').sub(":", arg) |
| arg = KernRe(r'\s*\[').sub('[', arg) |
| |
| args = KernRe(r'\s*,\s*').split(arg) |
| if args[0] and '*' in args[0]: |
| args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) |
| |
| first_arg = [] |
| r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') |
| if args[0] and r.match(args[0]): |
| args.pop(0) |
| first_arg.extend(r.group(1)) |
| first_arg.append(r.group(2)) |
| else: |
| first_arg = KernRe(r'\s+').split(args.pop(0)) |
| |
| args.insert(0, first_arg.pop()) |
| dtype = ' '.join(first_arg) |
| |
| for param in args: |
| if KernRe(r'^(\*+)\s*(.*)').match(param): |
| r = KernRe(r'^(\*+)\s*(.*)') |
| if not r.match(param): |
| self.emit_msg(ln, f"Invalid param: {param}") |
| continue |
| |
| param = r.group(1) |
| |
| self.save_struct_actual(r.group(2)) |
| self.push_parameter(ln, decl_type, r.group(2), |
| f"{dtype} {r.group(1)}", |
| arg, declaration_name) |
| |
| elif KernRe(r'(.*?):(\w+)').search(param): |
| r = KernRe(r'(.*?):(\w+)') |
| if not r.match(param): |
| self.emit_msg(ln, f"Invalid param: {param}") |
| continue |
| |
| if dtype != "": # Skip unnamed bit-fields |
| self.save_struct_actual(r.group(1)) |
| self.push_parameter(ln, decl_type, r.group(1), |
| f"{dtype}:{r.group(2)}", |
| arg, declaration_name) |
| else: |
| self.save_struct_actual(param) |
| self.push_parameter(ln, decl_type, param, dtype, |
| arg, declaration_name) |
| |
| def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): |
| """ |
| Check for errors inside sections, emitting warnings if not found |
| parameters are described. |
| """ |
| |
| sects = sectcheck.split() |
| prms = prmscheck.split() |
| err = False |
| |
| for sx in range(len(sects)): # pylint: disable=C0200 |
| err = True |
| for px in range(len(prms)): # pylint: disable=C0200 |
| prm_clean = prms[px] |
| prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) |
| prm_clean = attribute.sub('', prm_clean) |
| |
| # ignore array size in a parameter string; |
| # however, the original param string may contain |
| # spaces, e.g.: addr[6 + 2] |
| # and this appears in @prms as "addr[6" since the |
| # parameter list is split at spaces; |
| # hence just ignore "[..." for the sections check; |
| prm_clean = KernRe(r'\[.*').sub('', prm_clean) |
| |
| if prm_clean == sects[sx]: |
| err = False |
| break |
| |
| if err: |
| if decl_type == 'function': |
| dname = f"{decl_type} parameter" |
| else: |
| dname = f"{decl_type} member" |
| |
| self.emit_msg(ln, |
| f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") |
| |
| def check_return_section(self, ln, declaration_name, return_type): |
| """ |
| If the function doesn't return void, warns about the lack of a |
| return description. |
| """ |
| |
| if not self.config.wreturn: |
| return |
| |
| # Ignore an empty return type (It's a macro) |
| # Ignore functions with a "void" return type (but not "void *") |
| if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): |
| return |
| |
| if not self.entry.sections.get("Return", None): |
| self.emit_msg(ln, |
| f"No description found for return value of '{declaration_name}'") |
| |
| def dump_struct(self, ln, proto): |
| """ |
| Store an entry for an struct or union |
| """ |
| |
| type_pattern = r'(struct|union)' |
| |
| qualifiers = [ |
| "__attribute__", |
| "__packed", |
| "__aligned", |
| "____cacheline_aligned_in_smp", |
| "____cacheline_aligned", |
| ] |
| |
| definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" |
| struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') |
| |
| # Extract struct/union definition |
| members = None |
| declaration_name = None |
| decl_type = None |
| |
| r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) |
| if r.search(proto): |
| decl_type = r.group(1) |
| declaration_name = r.group(2) |
| members = r.group(3) |
| else: |
| r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') |
| |
| if r.search(proto): |
| decl_type = r.group(1) |
| declaration_name = r.group(3) |
| members = r.group(2) |
| |
| if not members: |
| self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") |
| return |
| |
| if self.entry.identifier != declaration_name: |
| self.emit_msg(ln, |
| f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") |
| return |
| |
| args_pattern = r'([^,)]+)' |
| |
| sub_prefixes = [ |
| (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), |
| (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), |
| |
| # Strip comments |
| (KernRe(r'\/\*.*?\*\/', re.S), ''), |
| |
| # Strip attributes |
| (attribute, ' '), |
| (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), |
| (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), |
| (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), |
| (KernRe(r'\s*__packed\s*', re.S), ' '), |
| (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), |
| (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), |
| (KernRe(r'\s*____cacheline_aligned', re.S), ' '), |
| |
| # Unwrap struct_group macros based on this definition: |
| # __struct_group(TAG, NAME, ATTRS, MEMBERS...) |
| # which has variants like: struct_group(NAME, MEMBERS...) |
| # Only MEMBERS arguments require documentation. |
| # |
| # Parsing them happens on two steps: |
| # |
| # 1. drop struct group arguments that aren't at MEMBERS, |
| # storing them as STRUCT_GROUP(MEMBERS) |
| # |
| # 2. remove STRUCT_GROUP() ancillary macro. |
| # |
| # The original logic used to remove STRUCT_GROUP() using an |
| # advanced regex: |
| # |
| # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; |
| # |
| # with two patterns that are incompatible with |
| # Python re module, as it has: |
| # |
| # - a recursive pattern: (?1) |
| # - an atomic grouping: (?>...) |
| # |
| # I tried a simpler version: but it didn't work either: |
| # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; |
| # |
| # As it doesn't properly match the end parenthesis on some cases. |
| # |
| # So, a better solution was crafted: there's now a NestedMatch |
| # class that ensures that delimiters after a search are properly |
| # matched. So, the implementation to drop STRUCT_GROUP() will be |
| # handled in separate. |
| |
| (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), |
| (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), |
| (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), |
| (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), |
| |
| # Replace macros |
| # |
| # TODO: use NestedMatch for FOO($1, $2, ...) matches |
| # |
| # it is better to also move those to the NestedMatch logic, |
| # to ensure that parenthesis will be properly matched. |
| |
| (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), |
| (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), |
| (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), |
| (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), |
| (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), |
| (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), |
| (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), |
| (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), |
| (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), |
| ] |
| |
| # Regexes here are guaranteed to have the end limiter matching |
| # the start delimiter. Yet, right now, only one replace group |
| # is allowed. |
| |
| sub_nested_prefixes = [ |
| (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), |
| ] |
| |
| for search, sub in sub_prefixes: |
| members = search.sub(sub, members) |
| |
| nested = NestedMatch() |
| |
| for search, sub in sub_nested_prefixes: |
| members = nested.sub(search, sub, members) |
| |
| # Keeps the original declaration as-is |
| declaration = members |
| |
| # Split nested struct/union elements |
| # |
| # This loop was simpler at the original kernel-doc perl version, as |
| # while ($members =~ m/$struct_members/) { ... } |
| # reads 'members' string on each interaction. |
| # |
| # Python behavior is different: it parses 'members' only once, |
| # creating a list of tuples from the first interaction. |
| # |
| # On other words, this won't get nested structs. |
| # |
| # So, we need to have an extra loop on Python to override such |
| # re limitation. |
| |
| while True: |
| tuples = struct_members.findall(members) |
| if not tuples: |
| break |
| |
| for t in tuples: |
| newmember = "" |
| maintype = t[0] |
| s_ids = t[5] |
| content = t[3] |
| |
| oldmember = "".join(t) |
| |
| for s_id in s_ids.split(','): |
| s_id = s_id.strip() |
| |
| newmember += f"{maintype} {s_id}; " |
| s_id = KernRe(r'[:\[].*').sub('', s_id) |
| s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) |
| |
| for arg in content.split(';'): |
| arg = arg.strip() |
| |
| if not arg: |
| continue |
| |
| r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') |
| if r.match(arg): |
| # Pointer-to-function |
| dtype = r.group(1) |
| name = r.group(2) |
| extra = r.group(3) |
| |
| if not name: |
| continue |
| |
| if not s_id: |
| # Anonymous struct/union |
| newmember += f"{dtype}{name}{extra}; " |
| else: |
| newmember += f"{dtype}{s_id}.{name}{extra}; " |
| |
| else: |
| arg = arg.strip() |
| # Handle bitmaps |
| arg = KernRe(r':\s*\d+\s*').sub('', arg) |
| |
| # Handle arrays |
| arg = KernRe(r'\[.*\]').sub('', arg) |
| |
| # Handle multiple IDs |
| arg = KernRe(r'\s*,\s*').sub(',', arg) |
| |
| r = KernRe(r'(.*)\s+([\S+,]+)') |
| |
| if r.search(arg): |
| dtype = r.group(1) |
| names = r.group(2) |
| else: |
| newmember += f"{arg}; " |
| continue |
| |
| for name in names.split(','): |
| name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() |
| |
| if not name: |
| continue |
| |
| if not s_id: |
| # Anonymous struct/union |
| newmember += f"{dtype} {name}; " |
| else: |
| newmember += f"{dtype} {s_id}.{name}; " |
| |
| members = members.replace(oldmember, newmember) |
| |
| # Ignore other nested elements, like enums |
| members = re.sub(r'(\{[^\{\}]*\})', '', members) |
| |
| self.create_parameter_list(ln, decl_type, members, ';', |
| declaration_name) |
| self.check_sections(ln, declaration_name, decl_type, |
| self.entry.sectcheck, self.entry.struct_actual) |
| |
| # Adjust declaration for better display |
| declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) |
| declaration = KernRe(r'\}\s+;').sub('};', declaration) |
| |
| # Better handle inlined enums |
| while True: |
| r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') |
| if not r.search(declaration): |
| break |
| |
| declaration = r.sub(r'\1,\n\2', declaration) |
| |
| def_args = declaration.split('\n') |
| level = 1 |
| declaration = "" |
| for clause in def_args: |
| |
| clause = clause.strip() |
| clause = KernRe(r'\s+').sub(' ', clause, count=1) |
| |
| if not clause: |
| continue |
| |
| if '}' in clause and level > 1: |
| level -= 1 |
| |
| if not KernRe(r'^\s*#').match(clause): |
| declaration += "\t" * level |
| |
| declaration += "\t" + clause + "\n" |
| if "{" in clause and "}" not in clause: |
| level += 1 |
| |
| self.output_declaration(decl_type, declaration_name, |
| struct=declaration_name, |
| definition=declaration, |
| parameterlist=self.entry.parameterlist, |
| parameterdescs=self.entry.parameterdescs, |
| parametertypes=self.entry.parametertypes, |
| parameterdesc_start_lines=self.entry.parameterdesc_start_lines, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose) |
| |
| def dump_enum(self, ln, proto): |
| """ |
| Stores an enum inside self.entries array. |
| """ |
| |
| # Ignore members marked private |
| proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) |
| proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) |
| |
| # Strip comments |
| proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) |
| |
| # Strip #define macros inside enums |
| proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) |
| |
| members = None |
| declaration_name = None |
| |
| r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') |
| if r.search(proto): |
| declaration_name = r.group(2) |
| members = r.group(1).rstrip() |
| else: |
| r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') |
| if r.match(proto): |
| declaration_name = r.group(1) |
| members = r.group(2).rstrip() |
| |
| if not members: |
| self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") |
| return |
| |
| if self.entry.identifier != declaration_name: |
| if self.entry.identifier == "": |
| self.emit_msg(ln, |
| f"{proto}: wrong kernel-doc identifier on prototype") |
| else: |
| self.emit_msg(ln, |
| f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") |
| return |
| |
| if not declaration_name: |
| declaration_name = "(anonymous)" |
| |
| member_set = set() |
| |
| members = KernRe(r'\([^;]*?[\)]').sub('', members) |
| |
| for arg in members.split(','): |
| if not arg: |
| continue |
| arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) |
| self.entry.parameterlist.append(arg) |
| if arg not in self.entry.parameterdescs: |
| self.entry.parameterdescs[arg] = self.undescribed |
| self.emit_msg(ln, |
| f"Enum value '{arg}' not described in enum '{declaration_name}'") |
| member_set.add(arg) |
| |
| for k in self.entry.parameterdescs: |
| if k not in member_set: |
| self.emit_msg(ln, |
| f"Excess enum value '%{k}' description in '{declaration_name}'") |
| |
| self.output_declaration('enum', declaration_name, |
| enum=declaration_name, |
| parameterlist=self.entry.parameterlist, |
| parameterdescs=self.entry.parameterdescs, |
| parameterdesc_start_lines=self.entry.parameterdesc_start_lines, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose) |
| |
| def dump_declaration(self, ln, prototype): |
| """ |
| Stores a data declaration inside self.entries array. |
| """ |
| |
| if self.entry.decl_type == "enum": |
| self.dump_enum(ln, prototype) |
| return |
| |
| if self.entry.decl_type == "typedef": |
| self.dump_typedef(ln, prototype) |
| return |
| |
| if self.entry.decl_type in ["union", "struct"]: |
| self.dump_struct(ln, prototype) |
| return |
| |
| self.output_declaration(self.entry.decl_type, prototype, |
| entry=self.entry) |
| |
| def dump_function(self, ln, prototype): |
| """ |
| Stores a function of function macro inside self.entries array. |
| """ |
| |
| func_macro = False |
| return_type = '' |
| decl_type = 'function' |
| |
| # Prefixes that would be removed |
| sub_prefixes = [ |
| (r"^static +", "", 0), |
| (r"^extern +", "", 0), |
| (r"^asmlinkage +", "", 0), |
| (r"^inline +", "", 0), |
| (r"^__inline__ +", "", 0), |
| (r"^__inline +", "", 0), |
| (r"^__always_inline +", "", 0), |
| (r"^noinline +", "", 0), |
| (r"^__FORTIFY_INLINE +", "", 0), |
| (r"__init +", "", 0), |
| (r"__init_or_module +", "", 0), |
| (r"__deprecated +", "", 0), |
| (r"__flatten +", "", 0), |
| (r"__meminit +", "", 0), |
| (r"__must_check +", "", 0), |
| (r"__weak +", "", 0), |
| (r"__sched +", "", 0), |
| (r"_noprof", "", 0), |
| (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), |
| (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), |
| (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), |
| (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), |
| (r"__attribute_const__ +", "", 0), |
| |
| # It seems that Python support for re.X is broken: |
| # At least for me (Python 3.13), this didn't work |
| # (r""" |
| # __attribute__\s*\(\( |
| # (?: |
| # [\w\s]+ # attribute name |
| # (?:\([^)]*\))? # attribute arguments |
| # \s*,? # optional comma at the end |
| # )+ |
| # \)\)\s+ |
| # """, "", re.X), |
| |
| # So, remove whitespaces and comments from it |
| (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), |
| ] |
| |
| for search, sub, flags in sub_prefixes: |
| prototype = KernRe(search, flags).sub(sub, prototype) |
| |
| # Macros are a special case, as they change the prototype format |
| new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) |
| if new_proto != prototype: |
| is_define_proto = True |
| prototype = new_proto |
| else: |
| is_define_proto = False |
| |
| # Yes, this truly is vile. We are looking for: |
| # 1. Return type (may be nothing if we're looking at a macro) |
| # 2. Function name |
| # 3. Function parameters. |
| # |
| # All the while we have to watch out for function pointer parameters |
| # (which IIRC is what the two sections are for), C types (these |
| # regexps don't even start to express all the possibilities), and |
| # so on. |
| # |
| # If you mess with these regexps, it's a good idea to check that |
| # the following functions' documentation still comes out right: |
| # - parport_register_device (function pointer parameters) |
| # - atomic_set (macro) |
| # - pci_match_device, __copy_to_user (long return type) |
| |
| name = r'[a-zA-Z0-9_~:]+' |
| prototype_end1 = r'[^\(]*' |
| prototype_end2 = r'[^\{]*' |
| prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' |
| |
| # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. |
| # So, this needs to be mapped in Python with (?:...)? or (?:...)+ |
| |
| type1 = r'(?:[\w\s]+)?' |
| type2 = r'(?:[\w\s]+\*+)+' |
| |
| found = False |
| |
| if is_define_proto: |
| r = KernRe(r'^()(' + name + r')\s+') |
| |
| if r.search(prototype): |
| return_type = '' |
| declaration_name = r.group(2) |
| func_macro = True |
| |
| found = True |
| |
| if not found: |
| patterns = [ |
| rf'^()({name})\s*{prototype_end}', |
| rf'^({type1})\s+({name})\s*{prototype_end}', |
| rf'^({type2})\s*({name})\s*{prototype_end}', |
| ] |
| |
| for p in patterns: |
| r = KernRe(p) |
| |
| if r.match(prototype): |
| |
| return_type = r.group(1) |
| declaration_name = r.group(2) |
| args = r.group(3) |
| |
| self.create_parameter_list(ln, decl_type, args, ',', |
| declaration_name) |
| |
| found = True |
| break |
| if not found: |
| self.emit_msg(ln, |
| f"cannot understand function prototype: '{prototype}'") |
| return |
| |
| if self.entry.identifier != declaration_name: |
| self.emit_msg(ln, |
| f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") |
| return |
| |
| prms = " ".join(self.entry.parameterlist) |
| self.check_sections(ln, declaration_name, "function", |
| self.entry.sectcheck, prms) |
| |
| self.check_return_section(ln, declaration_name, return_type) |
| |
| if 'typedef' in return_type: |
| self.output_declaration(decl_type, declaration_name, |
| function=declaration_name, |
| typedef=True, |
| functiontype=return_type, |
| parameterlist=self.entry.parameterlist, |
| parameterdescs=self.entry.parameterdescs, |
| parametertypes=self.entry.parametertypes, |
| parameterdesc_start_lines=self.entry.parameterdesc_start_lines, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose, |
| func_macro=func_macro) |
| else: |
| self.output_declaration(decl_type, declaration_name, |
| function=declaration_name, |
| typedef=False, |
| functiontype=return_type, |
| parameterlist=self.entry.parameterlist, |
| parameterdescs=self.entry.parameterdescs, |
| parametertypes=self.entry.parametertypes, |
| parameterdesc_start_lines=self.entry.parameterdesc_start_lines, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose, |
| func_macro=func_macro) |
| |
| def dump_typedef(self, ln, proto): |
| """ |
| Stores a typedef inside self.entries array. |
| """ |
| |
| typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' |
| typedef_ident = r'\*?\s*(\w\S+)\s*' |
| typedef_args = r'\s*\((.*)\);' |
| |
| typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) |
| typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) |
| |
| # Strip comments |
| proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) |
| |
| # Parse function typedef prototypes |
| for r in [typedef1, typedef2]: |
| if not r.match(proto): |
| continue |
| |
| return_type = r.group(1).strip() |
| declaration_name = r.group(2) |
| args = r.group(3) |
| |
| if self.entry.identifier != declaration_name: |
| self.emit_msg(ln, |
| f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") |
| return |
| |
| decl_type = 'function' |
| self.create_parameter_list(ln, decl_type, args, ',', declaration_name) |
| |
| self.output_declaration(decl_type, declaration_name, |
| function=declaration_name, |
| typedef=True, |
| functiontype=return_type, |
| parameterlist=self.entry.parameterlist, |
| parameterdescs=self.entry.parameterdescs, |
| parametertypes=self.entry.parametertypes, |
| parameterdesc_start_lines=self.entry.parameterdesc_start_lines, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose) |
| return |
| |
| # Handle nested parentheses or brackets |
| r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') |
| while r.search(proto): |
| proto = r.sub('', proto) |
| |
| # Parse simple typedefs |
| r = KernRe(r'typedef.*\s+(\w+)\s*;') |
| if r.match(proto): |
| declaration_name = r.group(1) |
| |
| if self.entry.identifier != declaration_name: |
| self.emit_msg(ln, |
| f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") |
| return |
| |
| self.output_declaration('typedef', declaration_name, |
| typedef=declaration_name, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines, |
| purpose=self.entry.declaration_purpose) |
| return |
| |
| self.emit_msg(ln, "error: Cannot parse typedef!") |
| |
| @staticmethod |
| def process_export(function_set, line): |
| """ |
| process EXPORT_SYMBOL* tags |
| |
| This method doesn't use any variable from the class, so declare it |
| with a staticmethod decorator. |
| """ |
| |
| # Note: it accepts only one EXPORT_SYMBOL* per line, as having |
| # multiple export lines would violate Kernel coding style. |
| |
| if export_symbol.search(line): |
| symbol = export_symbol.group(2) |
| function_set.add(symbol) |
| return |
| |
| if export_symbol_ns.search(line): |
| symbol = export_symbol_ns.group(2) |
| function_set.add(symbol) |
| |
| def process_normal(self, ln, line): |
| """ |
| STATE_NORMAL: looking for the /** to begin everything. |
| """ |
| |
| if not doc_start.match(line): |
| return |
| |
| # start a new entry |
| self.reset_state(ln) |
| self.entry.in_doc_sect = False |
| |
| # next line is always the function name |
| self.state = state.NAME |
| |
| def process_name(self, ln, line): |
| """ |
| STATE_NAME: Looking for the "name - description" line |
| """ |
| |
| if doc_block.search(line): |
| self.entry.new_start_line = ln |
| |
| if not doc_block.group(1): |
| self.entry.section = self.section_intro |
| else: |
| self.entry.section = doc_block.group(1) |
| |
| self.entry.identifier = self.entry.section |
| self.state = state.DOCBLOCK |
| return |
| |
| if doc_decl.search(line): |
| self.entry.identifier = doc_decl.group(1) |
| self.entry.is_kernel_comment = False |
| |
| decl_start = str(doc_com) # comment block asterisk |
| fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) |
| parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function |
| decl_end = r"(?:[-:].*)" # end of the name part |
| |
| # test for pointer declaration type, foo * bar() - desc |
| r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") |
| if r.search(line): |
| self.entry.identifier = r.group(1) |
| |
| # Test for data declaration |
| r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") |
| if r.search(line): |
| self.entry.decl_type = r.group(1) |
| self.entry.identifier = r.group(2) |
| self.entry.is_kernel_comment = True |
| else: |
| # Look for foo() or static void foo() - description; |
| # or misspelt identifier |
| |
| r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") |
| r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") |
| |
| for r in [r1, r2]: |
| if r.search(line): |
| self.entry.identifier = r.group(1) |
| self.entry.decl_type = "function" |
| |
| r = KernRe(r"define\s+") |
| self.entry.identifier = r.sub("", self.entry.identifier) |
| self.entry.is_kernel_comment = True |
| break |
| |
| self.entry.identifier = self.entry.identifier.strip(" ") |
| |
| self.state = state.BODY |
| |
| # if there's no @param blocks need to set up default section here |
| self.entry.section = SECTION_DEFAULT |
| self.entry.new_start_line = ln + 1 |
| |
| r = KernRe("[-:](.*)") |
| if r.search(line): |
| # strip leading/trailing/multiple spaces |
| self.entry.descr = r.group(1).strip(" ") |
| |
| r = KernRe(r"\s+") |
| self.entry.descr = r.sub(" ", self.entry.descr) |
| self.entry.declaration_purpose = self.entry.descr |
| self.state = state.BODY_MAYBE |
| else: |
| self.entry.declaration_purpose = "" |
| |
| if not self.entry.is_kernel_comment: |
| self.emit_msg(ln, |
| f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") |
| self.state = state.NORMAL |
| |
| if not self.entry.declaration_purpose and self.config.wshort_desc: |
| self.emit_msg(ln, |
| f"missing initial short description on line:\n{line}") |
| |
| if not self.entry.identifier and self.entry.decl_type != "enum": |
| self.emit_msg(ln, |
| f"wrong kernel-doc identifier on line:\n{line}") |
| self.state = state.NORMAL |
| |
| if self.config.verbose: |
| self.emit_msg(ln, |
| f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", |
| warning=False) |
| |
| return |
| |
| # Failed to find an identifier. Emit a warning |
| self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") |
| |
| def process_body(self, ln, line): |
| """ |
| STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. |
| """ |
| |
| if self.state == state.BODY_WITH_BLANK_LINE: |
| r = KernRe(r"\s*\*\s?\S") |
| if r.match(line): |
| self.dump_section() |
| self.entry.section = SECTION_DEFAULT |
| self.entry.new_start_line = ln |
| self.entry.contents = "" |
| |
| if doc_sect.search(line): |
| self.entry.in_doc_sect = True |
| newsection = doc_sect.group(1) |
| |
| if newsection.lower() in ["description", "context"]: |
| newsection = newsection.title() |
| |
| # Special case: @return is a section, not a param description |
| if newsection.lower() in ["@return", "@returns", |
| "return", "returns"]: |
| newsection = "Return" |
| |
| # Perl kernel-doc has a check here for contents before sections. |
| # the logic there is always false, as in_doc_sect variable is |
| # always true. So, just don't implement Wcontents_before_sections |
| |
| # .title() |
| newcontents = doc_sect.group(2) |
| if not newcontents: |
| newcontents = "" |
| |
| if self.entry.contents.strip("\n"): |
| self.dump_section() |
| |
| self.entry.new_start_line = ln |
| self.entry.section = newsection |
| self.entry.leading_space = None |
| |
| self.entry.contents = newcontents.lstrip() |
| if self.entry.contents: |
| self.entry.contents += "\n" |
| |
| self.state = state.BODY |
| return |
| |
| if doc_end.search(line): |
| self.dump_section() |
| |
| # Look for doc_com + <text> + doc_end: |
| r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') |
| if r.match(line): |
| self.emit_msg(ln, f"suspicious ending line: {line}") |
| |
| self.entry.prototype = "" |
| self.entry.new_start_line = ln + 1 |
| |
| self.state = state.PROTO |
| return |
| |
| if doc_content.search(line): |
| cont = doc_content.group(1) |
| |
| if cont == "": |
| if self.entry.section == self.section_context: |
| self.dump_section() |
| |
| self.entry.new_start_line = ln |
| self.state = state.BODY |
| else: |
| if self.entry.section != SECTION_DEFAULT: |
| self.state = state.BODY_WITH_BLANK_LINE |
| else: |
| self.state = state.BODY |
| |
| self.entry.contents += "\n" |
| |
| elif self.state == state.BODY_MAYBE: |
| |
| # Continued declaration purpose |
| self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() |
| self.entry.declaration_purpose += " " + cont |
| |
| r = KernRe(r"\s+") |
| self.entry.declaration_purpose = r.sub(' ', |
| self.entry.declaration_purpose) |
| |
| else: |
| if self.entry.section.startswith('@') or \ |
| self.entry.section == self.section_context: |
| if self.entry.leading_space is None: |
| r = KernRe(r'^(\s+)') |
| if r.match(cont): |
| self.entry.leading_space = len(r.group(1)) |
| else: |
| self.entry.leading_space = 0 |
| |
| # Double-check if leading space are realy spaces |
| pos = 0 |
| for i in range(0, self.entry.leading_space): |
| if cont[i] != " ": |
| break |
| pos += 1 |
| |
| cont = cont[pos:] |
| |
| # NEW LOGIC: |
| # In case it is different, update it |
| if self.entry.leading_space != pos: |
| self.entry.leading_space = pos |
| |
| self.entry.contents += cont + "\n" |
| return |
| |
| # Unknown line, ignore |
| self.emit_msg(ln, f"bad line: {line}") |
| |
| def process_inline(self, ln, line): |
| """STATE_INLINE: docbook comments within a prototype.""" |
| |
| if self.inline_doc_state == state.INLINE_NAME and \ |
| doc_inline_sect.search(line): |
| self.entry.section = doc_inline_sect.group(1) |
| self.entry.new_start_line = ln |
| |
| self.entry.contents = doc_inline_sect.group(2).lstrip() |
| if self.entry.contents != "": |
| self.entry.contents += "\n" |
| |
| self.inline_doc_state = state.INLINE_TEXT |
| # Documentation block end */ |
| return |
| |
| if doc_inline_end.search(line): |
| if self.entry.contents not in ["", "\n"]: |
| self.dump_section() |
| |
| self.state = state.PROTO |
| self.inline_doc_state = state.INLINE_NA |
| return |
| |
| if doc_content.search(line): |
| if self.inline_doc_state == state.INLINE_TEXT: |
| self.entry.contents += doc_content.group(1) + "\n" |
| if not self.entry.contents.strip(" ").rstrip("\n"): |
| self.entry.contents = "" |
| |
| elif self.inline_doc_state == state.INLINE_NAME: |
| self.emit_msg(ln, |
| f"Incorrect use of kernel-doc format: {line}") |
| |
| self.inline_doc_state = state.INLINE_ERROR |
| |
| def syscall_munge(self, ln, proto): # pylint: disable=W0613 |
| """ |
| Handle syscall definitions |
| """ |
| |
| is_void = False |
| |
| # Strip newlines/CR's |
| proto = re.sub(r'[\r\n]+', ' ', proto) |
| |
| # Check if it's a SYSCALL_DEFINE0 |
| if 'SYSCALL_DEFINE0' in proto: |
| is_void = True |
| |
| # Replace SYSCALL_DEFINE with correct return type & function name |
| proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) |
| |
| r = KernRe(r'long\s+(sys_.*?),') |
| if r.search(proto): |
| proto = KernRe(',').sub('(', proto, count=1) |
| elif is_void: |
| proto = KernRe(r'\)').sub('(void)', proto, count=1) |
| |
| # Now delete all of the odd-numbered commas in the proto |
| # so that argument types & names don't have a comma between them |
| count = 0 |
| length = len(proto) |
| |
| if is_void: |
| length = 0 # skip the loop if is_void |
| |
| for ix in range(length): |
| if proto[ix] == ',': |
| count += 1 |
| if count % 2 == 1: |
| proto = proto[:ix] + ' ' + proto[ix + 1:] |
| |
| return proto |
| |
| def tracepoint_munge(self, ln, proto): |
| """ |
| Handle tracepoint definitions |
| """ |
| |
| tracepointname = None |
| tracepointargs = None |
| |
| # Match tracepoint name based on different patterns |
| r = KernRe(r'TRACE_EVENT\((.*?),') |
| if r.search(proto): |
| tracepointname = r.group(1) |
| |
| r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') |
| if r.search(proto): |
| tracepointname = r.group(1) |
| |
| r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') |
| if r.search(proto): |
| tracepointname = r.group(2) |
| |
| if tracepointname: |
| tracepointname = tracepointname.lstrip() |
| |
| r = KernRe(r'TP_PROTO\((.*?)\)') |
| if r.search(proto): |
| tracepointargs = r.group(1) |
| |
| if not tracepointname or not tracepointargs: |
| self.emit_msg(ln, |
| f"Unrecognized tracepoint format:\n{proto}\n") |
| else: |
| proto = f"static inline void trace_{tracepointname}({tracepointargs})" |
| self.entry.identifier = f"trace_{self.entry.identifier}" |
| |
| return proto |
| |
| def process_proto_function(self, ln, line): |
| """Ancillary routine to process a function prototype""" |
| |
| # strip C99-style comments to end of line |
| r = KernRe(r"\/\/.*$", re.S) |
| line = r.sub('', line) |
| |
| if KernRe(r'\s*#\s*define').match(line): |
| self.entry.prototype = line |
| elif line.startswith('#'): |
| # Strip other macros like #ifdef/#ifndef/#endif/... |
| pass |
| else: |
| r = KernRe(r'([^\{]*)') |
| if r.match(line): |
| self.entry.prototype += r.group(1) + " " |
| |
| if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): |
| # strip comments |
| r = KernRe(r'/\*.*?\*/') |
| self.entry.prototype = r.sub('', self.entry.prototype) |
| |
| # strip newlines/cr's |
| r = KernRe(r'[\r\n]+') |
| self.entry.prototype = r.sub(' ', self.entry.prototype) |
| |
| # strip leading spaces |
| r = KernRe(r'^\s+') |
| self.entry.prototype = r.sub('', self.entry.prototype) |
| |
| # Handle self.entry.prototypes for function pointers like: |
| # int (*pcs_config)(struct foo) |
| |
| r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') |
| self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) |
| |
| if 'SYSCALL_DEFINE' in self.entry.prototype: |
| self.entry.prototype = self.syscall_munge(ln, |
| self.entry.prototype) |
| |
| r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') |
| if r.search(self.entry.prototype): |
| self.entry.prototype = self.tracepoint_munge(ln, |
| self.entry.prototype) |
| |
| self.dump_function(ln, self.entry.prototype) |
| self.reset_state(ln) |
| |
| def process_proto_type(self, ln, line): |
| """Ancillary routine to process a type""" |
| |
| # Strip newlines/cr's. |
| line = KernRe(r'[\r\n]+', re.S).sub(' ', line) |
| |
| # Strip leading spaces |
| line = KernRe(r'^\s+', re.S).sub('', line) |
| |
| # Strip trailing spaces |
| line = KernRe(r'\s+$', re.S).sub('', line) |
| |
| # Strip C99-style comments to the end of the line |
| line = KernRe(r"\/\/.*$", re.S).sub('', line) |
| |
| # To distinguish preprocessor directive from regular declaration later. |
| if line.startswith('#'): |
| line += ";" |
| |
| r = KernRe(r'([^\{\};]*)([\{\};])(.*)') |
| while True: |
| if r.search(line): |
| if self.entry.prototype: |
| self.entry.prototype += " " |
| self.entry.prototype += r.group(1) + r.group(2) |
| |
| self.entry.brcount += r.group(2).count('{') |
| self.entry.brcount -= r.group(2).count('}') |
| |
| self.entry.brcount = max(self.entry.brcount, 0) |
| |
| if r.group(2) == ';' and self.entry.brcount == 0: |
| self.dump_declaration(ln, self.entry.prototype) |
| self.reset_state(ln) |
| break |
| |
| line = r.group(3) |
| else: |
| self.entry.prototype += line |
| break |
| |
| def process_proto(self, ln, line): |
| """STATE_PROTO: reading a function/whatever prototype.""" |
| |
| if doc_inline_oneline.search(line): |
| self.entry.section = doc_inline_oneline.group(1) |
| self.entry.contents = doc_inline_oneline.group(2) |
| |
| if self.entry.contents != "": |
| self.entry.contents += "\n" |
| self.dump_section(start_new=False) |
| |
| elif doc_inline_start.search(line): |
| self.state = state.INLINE |
| self.inline_doc_state = state.INLINE_NAME |
| |
| elif self.entry.decl_type == 'function': |
| self.process_proto_function(ln, line) |
| |
| else: |
| self.process_proto_type(ln, line) |
| |
| def process_docblock(self, ln, line): |
| """STATE_DOCBLOCK: within a DOC: block.""" |
| |
| if doc_end.search(line): |
| self.dump_section() |
| self.output_declaration("doc", self.entry.identifier, |
| sectionlist=self.entry.sectionlist, |
| sections=self.entry.sections, |
| section_start_lines=self.entry.section_start_lines) |
| self.reset_state(ln) |
| |
| elif doc_content.search(line): |
| self.entry.contents += doc_content.group(1) + "\n" |
| |
| def parse_export(self): |
| """ |
| Parses EXPORT_SYMBOL* macros from a single Kernel source file. |
| """ |
| |
| export_table = set() |
| |
| try: |
| with open(self.fname, "r", encoding="utf8", |
| errors="backslashreplace") as fp: |
| |
| for line in fp: |
| self.process_export(export_table, line) |
| |
| except IOError: |
| return None |
| |
| return export_table |
| |
| def parse_kdoc(self): |
| """ |
| Open and process each line of a C source file. |
| The parsing is controlled via a state machine, and the line is passed |
| to a different process function depending on the state. The process |
| function may update the state as needed. |
| |
| Besides parsing kernel-doc tags, it also parses export symbols. |
| """ |
| |
| cont = False |
| prev = "" |
| prev_ln = None |
| export_table = set() |
| |
| try: |
| with open(self.fname, "r", encoding="utf8", |
| errors="backslashreplace") as fp: |
| for ln, line in enumerate(fp): |
| |
| line = line.expandtabs().strip("\n") |
| |
| # Group continuation lines on prototypes |
| if self.state == state.PROTO: |
| if line.endswith("\\"): |
| prev += line.rstrip("\\") |
| cont = True |
| |
| if not prev_ln: |
| prev_ln = ln |
| |
| continue |
| |
| if cont: |
| ln = prev_ln |
| line = prev + line |
| prev = "" |
| cont = False |
| prev_ln = None |
| |
| self.config.log.debug("%d %s%s: %s", |
| ln, state.name[self.state], |
| state.inline_name[self.inline_doc_state], |
| line) |
| |
| # This is an optimization over the original script. |
| # There, when export_file was used for the same file, |
| # it was read twice. Here, we use the already-existing |
| # loop to parse exported symbols as well. |
| # |
| # TODO: It should be noticed that not all states are |
| # needed here. On a future cleanup, process export only |
| # at the states that aren't handling comment markups. |
| self.process_export(export_table, line) |
| |
| # Hand this line to the appropriate state handler |
| if self.state == state.NORMAL: |
| self.process_normal(ln, line) |
| elif self.state == state.NAME: |
| self.process_name(ln, line) |
| elif self.state in [state.BODY, state.BODY_MAYBE, |
| state.BODY_WITH_BLANK_LINE]: |
| self.process_body(ln, line) |
| elif self.state == state.INLINE: # scanning for inline parameters |
| self.process_inline(ln, line) |
| elif self.state == state.PROTO: |
| self.process_proto(ln, line) |
| elif self.state == state.DOCBLOCK: |
| self.process_docblock(ln, line) |
| except OSError: |
| self.config.log.error(f"Error: Cannot open file {self.fname}") |
| |
| return export_table, self.entries |