diff --git a/contrib/utilities/update-copyright.sh b/contrib/utilities/update-copyright.sh index 8086458f888f..fa242a3b381a 100755 --- a/contrib/utilities/update-copyright.sh +++ b/contrib/utilities/update-copyright.sh @@ -14,6 +14,7 @@ ## ## --------------------------------------------------------------------- +set -u # Purpose: Update the copyright year of every file based on the last # modification recorded in the git logs @@ -24,51 +25,131 @@ if test ! -d source -o ! -d include -o ! -d examples ; then exit fi +processes=1 +accurate_first_year=false +until [[ "$@" == "" ]]; do + case $1 in + --pedantic) + accurate_first_year=true + shift;; + -j) + shift + if [[ "$@" == "" ]]; then + echo "Error: »-j« must be followed by a number" > /dev/stderr + echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr + exit 1 + fi + processes="${1}" + shift;; + *) + echo "Error: invalid option »$1«" > /dev/stderr + echo "Usage: update-copyright.sh [--pedantic] [-j N]" > /dev/stderr + exit 1;; + esac +done + +# +# A shell function that updates the copyright string for a given file $1: +# + +update_copyright() +{ + file="${1}" + + if ! [ -f ${file} ]; then + echo "Skipping ${file}: not a file" + return + fi + + if ! head -13 ${file} | grep -q "^.. This file is part of the deal.II library.$" ; then + echo "Skipping ${file}: no deal.II copyright header" + return + fi + + # + # Get the last year this file was modified from the git log. We don't + # want to see patches that just updated the copyright year, thus find the + # first commit that + # - does not mention both the words "update" and "copyright", as well as + # - "Update license headers". + # + + last_year=`git log -n 3 --date=short --format="format:%cd %s" ${file} | \ + egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \ + head -n 1 | \ + perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'` + + # + # It should not happen, that the grep removes all 3 most recent commits + # simultaneously but if it does then run the git log command again with + # full history: + # -files=" - $(echo contrib/*/*.{py,sh} \ - contrib/python-bindings/CMakeLists.txt \ - contrib/python-bindings/*/*.{h,cc,py} \ - contrib/utilities/{update-copyright,indent} \ - doc/doxygen/*/*.{h,h.in} \ - doc/doxygen/scripts/*.pl \ - doc/screen.css) - $(find include/ source/ examples/ | egrep '\.(h|in|cc|cu)$') - $(find cmake/ | egrep '\.(cmake|in|cc)$') - $(find . -name CMakeLists.txt) - $(find tests/ | egrep '\.(h|cc)$') - $(find doc/ | egrep '\.html$') -" - - -for i in $files ; do - # get the last year this file was modified from the git log. - # we don't want to see patches that just updated the copyright - # year, so output the dates and log messages of the last 3 - # commits, throw away all that mention both the words - # "update" and "copyright", and take the year of the first - # message that remains + [ -z "$last_year" ] && last_year=`git log --date=short --format="format:%cd %s" ${file} | \ + egrep -i -v "update.*copyright|copyright.*update|Update license headers" | \ + head -n 1 | \ + perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'` + + if ! $accurate_first_year; then + # + # Get the first year this file was modified from the actual file. This is + # fast but might be inaccurate. + # + first_year=`egrep 'Copyright \(C\) [0-9]{4}' ${file} | \ + perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"` + else + # + # Get the first (plausible) year this file was modified. While each file + # (ideally) already contains a start year, experience suggests that this + # information is typically wildly incorrect because files (and copyright + # headers) get copied all the time. We thus grab this information from + # git history (which is a rather expensive operation - but so is running + # this script in the first place). + # + # * In order to make the git log lookup as fast as possible we use + # `--diff-filter=A` to skip all but the first commit in which the file + # was created. + # + # We try to find simple renames with the `--follow` toggle. Note that + # - we enforce a 90% similarity when trying to find a potential rename + # candiate with `-M90%` + # - we specifically do not use `--find-copies` here because we want to + # track the beginning of every individual file (and not the start date + # of the original one). + # - `--reverse` cannot be used in combination with `--follow`. So we + # simply `tail` the output. + # + first_year=`git log --follow -M90% --diff-filter=A --date=short --format="format:%cd %s" ${file} | \ + tail -n 1 | \ + perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'` + fi + + # + # Print a status message and update copyright line: # - # (it should be enough to look at the last 2 messages since - # ideally no two successive commits should have updated the - # copyright year. let's err on the safe side and take the last - # 3 commits.) - last_year=`git log -n 3 --date=short --format="format:%cd %s" $i | \ - egrep -i -v "update.*copyright|copyright.*update" | \ - head -n 1 | \ - perl -p -e 's/^(\d\d\d\d)-.*/\1/g;'` - - # get the first year this file was modified from the actual - # file. this may predate the git log if the file was copied - # from elsewhere - first_year=`cat $i | egrep 'Copyright \(C\) [0-9]{4}' | \ - perl -p -e "s/.*Copyright \(C\) (\d{4}).*/\1/g;"` - - # print a status message. we really only have to update - # the copyright year if the first and last year are - # different - echo "Processing $i: ${first_year} - ${last_year}" - if test ! "${first_year}" = "${last_year}" ; then - perl -pi -e "s/(Copyright \(C\) \d{4})( - \d{4})?(, \d{4}( - \d{4})?)*/\1 - ${last_year}/g;" $i + + if [ "${first_year}" = "${last_year}" ]; then + echo "Processing ${file}: ${last_year}" + perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${last_year}/g if 1..13;" ${file} + else + echo "Processing ${file}: ${first_year} - ${last_year}" + perl -pi -e "s/(^.. Copyright \(C\)) \d{4}( - \d{4})?(, \d{4}( - \d{4})?)*/\1 ${first_year} - ${last_year}/g if 1..13;" ${file} fi -done +} + +# +# Run copyright update in parallel: +# + +process() +{ + i=0 + find ${1} -type f -regextype egrep -regex "${2}" | while read file; do + (( i=i%processes )); (( i++==0 )) && wait + update_copyright "${file}" & + done +} + +process "." "CMakeLists.txt|CTestConfig.cmake" update_copyright +process "cmake contrib doc examples include source tests" ".*" update_copyright +