Skip to content

Commit

Permalink
CPP expression parser for makedep
Browse files Browse the repository at this point in the history
This patch adds a relatively robust parser for C preprocessor
expressions inside of an #if statement.

The following are supported:

* Nearly all operators, including arithmetic, logical, and bitwise,

* Parentheses within expressions,

* defined() evaluations.

The following are explicitly not supported:

* Function macros,

* Multiline preprocessors.

No doubt there are other lingering issues, but this is comprehensive
enough to handle both MOM6 as well as current and legacy FMS source
codes.

Existing Makefile.dep output files appear to be mostly unchanged.  One
rule (data_override.o) had its arguments reordered but is otherwise
unchanged.  mpp_data.o had its rule corrected to use mpp_util_mpi.inc
rather than mpp_util_nocomm.inc.

Some fixes and adjustments were made to the overall makedep source:

* Input macros (-D) are now stored as key-value dicts, rather than
  simply a list of macro names.

* Input macros are now passed to all scan_fortran_file() calls, rather
  than just the Fortran source.

* Input macros are now correctly passed to FMS makedep.  Previously,
  these were omitted from the Makefile generation.

* Previously, #if blocks were always set to True, even though the
  comments indicated that they were always set to False.  Given that
  neither of these was ever correct, it's amazing that we were able to
  survive this long without prior incident.

The motivation for this PR comes from issues with Makefile generation in
FMS.  Older versions of FMS were unable to correctly resolve their
dependencies in fft.f90 on certain systems (perhaps caused
by filesystem peculiarities).  Newer versions of FMS were unable to
handle the #if block default from True to False.  Inevitably, we threw
up our hands and solved the underlying problem.
  • Loading branch information
marshallward committed Apr 3, 2024
1 parent f9372f3 commit 6acc2ff
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ac/deps/Makefile.fms.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ ARFLAGS = @ARFLAGS@
.PHONY: depend
depend: Makefile.dep
Makefile.dep:
$(PYTHON) $(MAKEDEP) -o Makefile.dep -e -x libFMS.a -s @srcdir@/test_fms @srcdir@
$(PYTHON) $(MAKEDEP) $(DEFS) -o Makefile.dep -e -x libFMS.a -s @srcdir@/test_fms @srcdir@
205 changes: 194 additions & 11 deletions ac/makedep
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import re
import sys


# Pre-compile re searches
# Fortran tokenization

re_module = re.compile(r"^ *module +([a-z_0-9]+)")
re_use = re.compile(r"^ *use +([a-z_0-9]+)")
re_cpp_define = re.compile(r"^ *# *define +[_a-zA-Z][_a-zA-Z0-9]")
Expand All @@ -32,6 +33,80 @@ re_procedure = re.compile(
)


# Preprocessor expression tokenization
cpp_scanner = re.Scanner([
(r'defined', lambda scanner, token: token),
(r'[_A-Za-z][_0-9a-zA-Z]*', lambda scanner, token: token),
(r'[0-9]+', lambda scanner, token: token),
(r'\(', lambda scanner, token: token),
(r'\)', lambda scanner, token: token),
(r'\*', lambda scanner, token: token),
(r'/', lambda scanner, token: token),
(r'\+', lambda scanner, token: token),
(r'-', lambda scanner, token: token),
(r'!', lambda scanner, token: token),
(r'>>', lambda scanner, token: token),
(r'>=', lambda scanner, token: token),
(r'>', lambda scanner, token: token),
(r'<<', lambda scanner, token: token),
(r'<=', lambda scanner, token: token),
(r'<', lambda scanner, token: token),
(r'==', lambda scanner, token: token),
(r'&&', lambda scanner, token: token),
(r'&', lambda scanner, token: token),
(r'\|\|', lambda scanner, token: token),
(r'\|', lambda scanner, token: token),
(r'^\#if', None),
(r'\s+', None),
])


cpp_operate = {
'!': lambda x: not x,
'*': lambda x, y: x * y,
'/': lambda x, y: x // y,
'+': lambda x, y: x + y,
'-': lambda x, y: x - y,
'>>': lambda x, y: x >> y,
'<<': lambda x, y: x << y,
'==': lambda x, y: x == y,
'>': lambda x, y: x > y,
'>=': lambda x, y: x >= y,
'<': lambda x, y: x < y,
'<=': lambda x, y: x <= y,
'&': lambda x, y: x & y,
'^': lambda x, y: x ^ y,
'|': lambda x, y: x | y,
'&&': lambda x, y: x and y,
'||': lambda x, y: x or y,
}


cpp_op_rank = {
'(': 13,
'!': 12,
'*': 11,
'/': 11,
'+': 10,
'-': 10,
'>>': 9,
'<<': 9,
'>': 8,
'>=': 8,
'<': 8,
'<=': 8,
'==': 7,
'&': 6,
'^': 5,
'|': 4,
'&&': 2,
'||': 2,
')': 1,
'$': 1,
None: 0,
}


def create_deps(src_dirs, skip_dirs, makefile, debug, exec_target, fc_rule,
link_externals, defines):
"""Create "makefile" after scanning "src_dis"."""
Expand Down Expand Up @@ -105,7 +180,7 @@ def create_deps(src_dirs, skip_dirs, makefile, debug, exec_target, fc_rule,
all_modules += mods

for f in c_files:
_, _, cpp, inc, _, _ = scan_fortran_file(f)
_, _, cpp, inc, _, _ = scan_fortran_file(f, defines)
# maps object file to .h files included
o2h[object_file(f)] = cpp
externals.append(object_file(f))
Expand Down Expand Up @@ -158,7 +233,7 @@ def create_deps(src_dirs, skip_dirs, makefile, debug, exec_target, fc_rule,
]
missing_mods = [m for m in o2uses[o] if m not in all_modules]

incs, inc_used = nested_inc(o2h[o] + o2inc[o], f2F)
incs, inc_used = nested_inc(o2h[o] + o2inc[o], f2F, defines)
inc_mods = [u for u in inc_used if u not in found_mods and u in all_modules]

incdeps = sorted(set([f2F[f] for f in incs if f in f2F]))
Expand Down Expand Up @@ -250,7 +325,7 @@ def link_obj(obj, o2uses, mod2o, all_modules):
return sorted(set(olst))


def nested_inc(inc_files, f2F):
def nested_inc(inc_files, f2F, defines):
"""List of all files included by "inc_files", either by #include or F90
include."""
hlst = []
Expand All @@ -260,7 +335,7 @@ def nested_inc(inc_files, f2F):
if hfile not in f2F.keys():
return

_, used, cpp, inc, _, _ = scan_fortran_file(f2F[hfile])
_, used, cpp, inc, _, _ = scan_fortran_file(f2F[hfile], defines)

# Record any module updates inside of include files
used_mods.update(used)
Expand All @@ -286,7 +361,8 @@ def scan_fortran_file(src_file, defines=None):

cpp_defines = defines if defines is not None else []

cpp_macros = [define.split('=')[0] for define in cpp_defines]
#cpp_macros = [define.split('=')[0] for define in cpp_defines]
cpp_macros = dict([t.split('=') for t in cpp_defines])
cpp_group_stack = []

with io.open(src_file, 'r', errors='replace') as file:
Expand Down Expand Up @@ -328,9 +404,9 @@ def scan_fortran_file(src_file, defines=None):
if match:
cpp_group_stack.append(cpp_exclude)

# XXX: Don't attempt to parse #if statements, but store the state.
# if/endif stack. For now, assume that these always fail.
cpp_exclude = False
cpp_expr_value = cpp_expr_eval(line, cpp_macros)

cpp_exclude = not cpp_expr_value

# Complement #else condition group
match = re_cpp_else.match(line)
Expand All @@ -351,8 +427,14 @@ def scan_fortran_file(src_file, defines=None):
# Activate a new macro (ignoring the value)
match = re_cpp_define.match(line)
if match:
new_macro = line.lstrip()[1:].split()[1]
cpp_macros.append(new_macro)
tokens = line.strip()[1:].split(maxsplit=2)
macro = tokens[1]
value = tokens[2] if tokens[2:] else None
if '(' in macro:
# TODO: Actual handling of function macros
macro, arg = macro.split('(', maxsplit=1)
value = '(' + arg + value
cpp_macros[macro] = value

# Deactivate a macro
match = re_cpp_undef.match(line)
Expand Down Expand Up @@ -441,6 +523,107 @@ def add_suff(lst, suff):
return [f + suff for f in lst]


def cpp_expr_eval(expr, macros=None):
if macros is None:
macros = {}

results, remainder = cpp_scanner.scan(expr)

# Abort if any characters are not tokenized
if remainder:
print('There are untokenized characters!')
print('Expression:', repr(expr))
print('Tokens:', results)
print('Unscanned:', remainder)
raise

# Add an "end of line" character to force evaluation of the final tokens.
results.append('$')

stack = []
prior_op = None

tokens = iter(results)
for tok in tokens:
# Evaluate "defined()" statements
if tok == 'defined':
tok = next(tokens)

parens = tok == '('
if parens:
tok = next(tokens)

# NOTE: Any key in `macros` is considered to be set, even if the
# value is None.
value = tok in macros

# Negation
while prior_op == '!':
op = stack.pop()
assert op == '!'
value = cpp_operate[op](value)
prior_op = stack[-1] if stack else None

stack.append(value)

if parens:
tok = next(tokens)
assert tok == ')'

elif tok.isdigit():
value = int(tok)
stack.append(value)

elif tok.isidentifier():
# "Identifiers that are not macros, which are all considered to be
# the number zero." (CPP manual, 4.2.2)
value = macros.get(tok, '0')
if value.isdigit():
value = int(value)
stack.append(value)

elif tok in cpp_op_rank.keys():
while cpp_op_rank[tok] <= cpp_op_rank[prior_op]:

# Skip unary prefix operators (only '!' at the moment)
if tok == '!':
break

second = stack.pop()
op = stack.pop()
first = stack.pop()

value = cpp_operate[op](first, second)
prior_op = stack[-1] if stack else None

if prior_op == '(':
prior_op = None
if tok == ')':
stack.pop()

stack.append(value)

if tok == ')':
prior_op = stack[-2] if stack and len(stack) > 1 else None
else:
stack.append(tok)
prior_op = tok

if prior_op in ('(',):
prior_op = None

else:
print("Unsupported token:", tok)
raise

# Remove the tail value
eol = stack.pop()
assert eol == '$'
value = stack.pop()

return value


# Parse arguments
parser = argparse.ArgumentParser(
description="Generate make dependencies for F90 source code."
Expand Down

0 comments on commit 6acc2ff

Please sign in to comment.