checkkconfigsymbols.py: improve detection of defects

This patch improves the detection of defects by updating the
regular expression to find Kconfig identifiers in the source
code, and fixes some cases of false positives. The following
changes are made:
- improve regex to find Kconfig identifiers in the source
- exclude .log files from analysis
- improve filtering of false positives (e.g, CONFIG_XXX)
- change output format from (feature:\tlist) to (feature\tlist)

Signed-off-by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
index f944089..e9cc689 100644
--- a/scripts/checkkconfigsymbols.py
+++ b/scripts/checkkconfigsymbols.py
@@ -1,36 +1,31 @@
 #!/usr/bin/env python
 
-"""Find Kconfig identifieres that are referenced but not defined."""
+"""Find Kconfig identifiers that are referenced but not defined."""
 
-# Copyright (C) 2014 Valentin Rothberg <valentinrothberg@gmail.com>
-# Copyright (C) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
+# (c) 2014 Valentin Rothberg <valentinrothberg@gmail.com>
+# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
 #
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-# more details.
+# Licensed under the terms of the GNU GPL License version 2
 
 
 import os
 import re
 from subprocess import Popen, PIPE, STDOUT
 
-# REGEX EXPRESSIONS
+
+# regex expressions
 OPERATORS = r"&|\(|\)|\||\!"
-FEATURE = r"\w*[A-Z]{1}\w*"
-CONFIG_DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*"
+FEATURE = r"(?:\w*[A-Z0-9]\w*){2,}"
+DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*"
 EXPR = r"(?:" + OPERATORS + r"|\s|" + FEATURE + r")+"
 STMT = r"^\s*(?:if|select|depends\s+on)\s+" + EXPR
+SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")"
 
-# REGEX OBJECTS
+# regex objects
 REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
 REGEX_FEATURE = re.compile(r"(" + FEATURE + r")")
-REGEX_SOURCE_FEATURE = re.compile(r"(?:D|\W|\b)+CONFIG_(" + FEATURE + r")")
-REGEX_KCONFIG_DEF = re.compile(CONFIG_DEF)
+REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE)
+REGEX_KCONFIG_DEF = re.compile(DEF)
 REGEX_KCONFIG_EXPR = re.compile(EXPR)
 REGEX_KCONFIG_STMT = re.compile(STMT)
 REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
@@ -42,7 +37,7 @@
     source_files = []
     kconfig_files = []
     defined_features = set()
-    referenced_features = dict()
+    referenced_features = dict()  # {feature: [files]}
 
     # use 'git ls-files' to get the worklist
     pop = Popen("git ls-files", stdout=PIPE, stderr=STDOUT, shell=True)
@@ -52,12 +47,12 @@
 
     for gitfile in stdout.rsplit("\n"):
         if ".git" in gitfile or "ChangeLog" in gitfile or \
-                os.path.isdir(gitfile):
+                ".log" in gitfile or os.path.isdir(gitfile):
             continue
         if REGEX_FILE_KCONFIG.match(gitfile):
             kconfig_files.append(gitfile)
         else:
-            # All non-Kconfig files are checked for consistency
+            # all non-Kconfig files are checked for consistency
             source_files.append(gitfile)
 
     for sfile in source_files:
@@ -68,15 +63,17 @@
 
     print "Undefined symbol used\tFile list"
     for feature in sorted(referenced_features):
+        # filter some false positives
+        if feature == "FOO" or feature == "BAR" or \
+                feature == "FOO_BAR" or feature == "XXX":
+            continue
         if feature not in defined_features:
             if feature.endswith("_MODULE"):
-                # Avoid false positives for kernel modules
+                # avoid false positives for kernel modules
                 if feature[:-len("_MODULE")] in defined_features:
                     continue
-            if "FOO" in feature or "BAR" in feature:
-                continue
             files = referenced_features.get(feature)
-            print "%s:\t%s" % (feature, ", ".join(files))
+            print "%s\t%s" % (feature, ", ".join(files))
 
 
 def parse_source_file(sfile, referenced_features):
@@ -92,9 +89,9 @@
         for feature in features:
             if not REGEX_FILTER_FEATURES.search(feature):
                 continue
-            paths = referenced_features.get(feature, set())
-            paths.add(sfile)
-            referenced_features[feature] = paths
+            sfiles = referenced_features.get(feature, set())
+            sfiles.add(sfile)
+            referenced_features[feature] = sfiles
 
 
 def get_features_in_line(line):
@@ -113,7 +110,7 @@
     for i in range(len(lines)):
         line = lines[i]
         line = line.strip('\n')
-        line = line.split("#")[0]  # Ignore Kconfig comments
+        line = line.split("#")[0]  # ignore comments
 
         if REGEX_KCONFIG_DEF.match(line):
             feature_def = REGEX_KCONFIG_DEF.findall(line)
@@ -122,11 +119,11 @@
         elif REGEX_KCONFIG_HELP.match(line):
             skip = True
         elif skip:
-            # Ignore content of help messages
+            # ignore content of help messages
             pass
         elif REGEX_KCONFIG_STMT.match(line):
             features = get_features_in_line(line)
-            # Multi-line statements
+            # multi-line statements
             while line.endswith("\\"):
                 i += 1
                 line = lines[i]