From 8c87a5723cc6d9db43bb44a0bb13cb312debd684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 7 Nov 2024 17:47:17 +0100 Subject: [PATCH 01/26] Dockerfile: COPY Elixir sources after `pip install` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit COPY sources in two steps: (1) copy requirements.txt then do `pip install` stuff then (2) copy all remaining sources. This means the iterating time to rebuild the Docker image when editing sources is much shorter: from 22.3s to 7.3s on my machine. Signed-off-by: Théo Lebrun --- docker/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3be3c60e..2e5180f0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,7 +32,7 @@ RUN \ libyaml-0-2 \ wget -COPY . /usr/local/elixir/ +COPY ./requirements.txt /usr/local/elixir/requirements.txt WORKDIR /usr/local/elixir/ @@ -43,6 +43,8 @@ RUN python3 -m venv venv && \ pip install /tmp/build/berkeleydb-*.whl && \ pip install -r requirements.txt +COPY . /usr/local/elixir/ + RUN mkdir -p /srv/elixir-data/ COPY ./docker/000-default.conf /etc/apache2/sites-available/000-default.conf From a07470f1de5d119cc8cb687d7e8f9951cbcb273e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 7 Nov 2024 18:11:44 +0100 Subject: [PATCH 02/26] utils/index-repository: fetch in parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous sequence: - git clone ... # first fetch - git remote add remote0 ... - git fetch remote0 # second fetch - git remote add remote1 ... - git fetch remote1 # third fetch Now: - git init - git remote add remote0 ... - git remote add remote1 ... - git remote add remote2 ... - git fetch --all -j4 # all fetches at the same time Signed-off-by: Théo Lebrun --- utils/index-repository | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index 82314068..cb8775cf 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -10,20 +10,21 @@ export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0"))) dir=/srv/elixir-data/$1 -mkdir -p $dir/data -git clone --bare $2 $dir/repo +mkdir -p $dir/data $dir/repo + +git -C $dir/repo init --bare git config --system --add safe.directory $dir/repo -shift shift i=0 for remote do - git -C $dir/repo remote add other$i $remote - git -C $dir/repo fetch --tags other$i + git -C $dir/repo remote add remote$i $remote i=$(($i+1)) done +git -C $dir/repo fetch --all --tags -j4 + export LXR_REPO_DIR=$dir/repo export LXR_DATA_DIR=$dir/data python3 /usr/local/elixir/update.py $ELIXIR_THREADS From e64d3254abd4d72655bc83f5d3e77abebb40c88a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Thu, 7 Nov 2024 18:15:17 +0100 Subject: [PATCH 03/26] utils/update-elixir-data: fetch in parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is pretty useful as update-elixir-data gets called often to check for new updates. Most often, there are none, so checking all remotes at the same time is useful. This only applies to the kernel, that is the only project using multiple (three) remotes. Signed-off-by: Théo Lebrun --- utils/update-elixir-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/update-elixir-data b/utils/update-elixir-data index de948ec4..32206802 100755 --- a/utils/update-elixir-data +++ b/utils/update-elixir-data @@ -31,7 +31,7 @@ for dir_name in $LXR_PROJ_DIR/*; do export LXR_REPO_DIR=$dir_name/repo cd $LXR_REPO_DIR - git fetch --all --tags + git fetch --all --tags -j4 cd $ELIXIR_INSTALL ./update.py $ELIXIR_THREADS From a6199613b0d4fd1bd4d36739380a5f880d72928a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:14:20 +0100 Subject: [PATCH 04/26] utils/index-repository: add alias for `git -C ...` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the script. We never `cd` into the directory, we instead use `git -C`. Avoid repeating it by creating a $git variable. Signed-off-by: Théo Lebrun --- utils/index-repository | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index cb8775cf..7f014f82 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -12,18 +12,20 @@ dir=/srv/elixir-data/$1 mkdir -p $dir/data $dir/repo -git -C $dir/repo init --bare +git="git -C $dir/repo" + +$git init --bare git config --system --add safe.directory $dir/repo shift i=0 for remote do - git -C $dir/repo remote add remote$i $remote + $git remote add remote$i $remote i=$(($i+1)) done -git -C $dir/repo fetch --all --tags -j4 +$git fetch --all --tags -j4 export LXR_REPO_DIR=$dir/repo export LXR_DATA_DIR=$dir/data From afdf910c435f61e9d7408859febbb1ca987d4855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:13:11 +0100 Subject: [PATCH 05/26] utils/index-repository: support calling on existing repository MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make utils/index-repository idempotent, meaning we can call it multiple times on the same repo and same remotes without issues. Also allow adding new remotes to an existing repo. Signed-off-by: Théo Lebrun --- utils/index-repository | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/utils/index-repository b/utils/index-repository index 7f014f82..1840cf88 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -14,13 +14,26 @@ mkdir -p $dir/data $dir/repo git="git -C $dir/repo" +# This doesn't fail if repo already exists $git init --bare + git config --system --add safe.directory $dir/repo +existing_remotes="$($git remote | xargs -L1 -r $git remote get-url | sort -u)" + shift -i=0 +i="$($git remote | awk ' + BEGIN { n=-1; } + $0 ~ /^remote[0-9]+$/ { i=substr($0, length("remote")+1); + if (i>n) n=i; } + END { print n+1; }')" for remote do + # Don't `git remote add` remotes that already exist, which is not an error. + if echo "$existing_remotes" | grep -qF "$remote"; then + continue; + fi + $git remote add remote$i $remote i=$(($i+1)) done From a931e93b2f1184405ec4f33f2121c5009a27054b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:41:15 +0100 Subject: [PATCH 06/26] utils/*: delete common.sh and inline $ELIXIR_THREADS fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ELIXIR_THREADS fallback to nproc is straight forward code, much more than the incantation to find the path to the Elixir install path. Remove the incantation and replace by simple code: if test -z "$ELIXIR_THREADS"; then ELIXIR_THREADS="$(nproc)" fi Signed-off-by: Théo Lebrun --- utils/common.sh | 24 ------------------------ utils/index-all-repositories | 7 ++++--- utils/index-repository | 7 ++++--- utils/update-elixir-data | 5 +++-- 4 files changed, 11 insertions(+), 32 deletions(-) delete mode 100755 utils/common.sh diff --git a/utils/common.sh b/utils/common.sh deleted file mode 100755 index 3bc403c0..00000000 --- a/utils/common.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2019--2023 Michael Opdenacker and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -if [ "$ELIXIR_THREADS" = "" ] -then - # Set number of threads to the number of CPU cores - # available to the current process - export ELIXIR_THREADS=`nproc` -fi diff --git a/utils/index-all-repositories b/utils/index-all-repositories index eaf2fafa..8636286a 100755 --- a/utils/index-all-repositories +++ b/utils/index-all-repositories @@ -21,6 +21,10 @@ # You should have received a copy of the GNU Affero General Public License # along with Elixir. If not, see . +if test -z "$ELIXIR_THREADS"; then + ELIXIR_THREADS="$(nproc)" +fi + index() { project=$1 master=$2 @@ -62,9 +66,6 @@ index() { ./update.py $ELIXIR_THREADS } -export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0"))) -. $ELIXIR_INSTALL/utils/common.sh - if [ "$ELIXIR_ROOT" = "" ] then echo "Error: ELIXIR_ROOT environment variable not set" diff --git a/utils/index-repository b/utils/index-repository index 1840cf88..b6bf47f6 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -5,9 +5,6 @@ if [ "$#" -lt 2 ]; then exit 1 fi -export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0"))) -. $ELIXIR_INSTALL/utils/common.sh - dir=/srv/elixir-data/$1 mkdir -p $dir/data $dir/repo @@ -40,6 +37,10 @@ done $git fetch --all --tags -j4 +if test -z "$ELIXIR_THREADS"; then + ELIXIR_THREADS="$(nproc)" +fi + export LXR_REPO_DIR=$dir/repo export LXR_DATA_DIR=$dir/data python3 /usr/local/elixir/update.py $ELIXIR_THREADS diff --git a/utils/update-elixir-data b/utils/update-elixir-data index 32206802..ad0fcbab 100755 --- a/utils/update-elixir-data +++ b/utils/update-elixir-data @@ -17,8 +17,9 @@ # You should have received a copy of the GNU Affero General Public License # along with Elixir. If not, see . -export ELIXIR_INSTALL=$(dirname $(dirname $(readlink -f "$0"))) -. $ELIXIR_INSTALL/utils/common.sh +if test -z "$ELIXIR_THREADS"; then + ELIXIR_THREADS="$(nproc)" +fi if [ -z "$LXR_PROJ_DIR" ]; then echo "ERROR: LXR_PROJ_DIR environment variable not set" From a343612e79ccb6b7970d123c0a3debc6aa96dbcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:45:29 +0100 Subject: [PATCH 07/26] utils/index-repository: refactor by creating project_init() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index-repository | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index b6bf47f6..4a1eb067 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -5,16 +5,22 @@ if [ "$#" -lt 2 ]; then exit 1 fi -dir=/srv/elixir-data/$1 +# $1 is the project path (inside will be created data/ and repo/). +# It supports being called on an existing project. +project_init() { + mkdir -p $1/data $1/repo -mkdir -p $dir/data $dir/repo + # This doesn't fail if repo already exists + git -C $1/repo init --bare -git="git -C $dir/repo" + git config --system --add safe.directory $1/repo +} -# This doesn't fail if repo already exists -$git init --bare +dir=/srv/elixir-data/$1 -git config --system --add safe.directory $dir/repo +project_init "$dir" + +git="git -C $dir/repo" existing_remotes="$($git remote | xargs -L1 -r $git remote get-url | sort -u)" From 2fa895c61599520b9b1c70f75fb0a4877f95c0aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:46:39 +0100 Subject: [PATCH 08/26] utils/index-repository: refactor by creating project_add_remote() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index-repository | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index 4a1eb067..e0fe18e5 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -16,29 +16,36 @@ project_init() { git config --system --add safe.directory $1/repo } +# $1 is the project path (parent of data/ and repo/). +# $2 is the remote URL. +project_add_remote() { + git="git -C $1/repo" + + # Do nothing if remote already exists. + if $git remote | xargs -L1 -r $git remote get-url | grep -qxF "$2"; then + return; + fi + + # Remotes are called remote$i with $i = 0, 1, 2... + i="$($git remote | awk ' + BEGIN { n=-1; } + $0 ~ /^remote[0-9]+$/ { i=substr($0, length("remote")+1); + if (i>n) n=i; } + END { print n+1; }')" + + $git remote add remote$i "$2" +} + dir=/srv/elixir-data/$1 project_init "$dir" git="git -C $dir/repo" -existing_remotes="$($git remote | xargs -L1 -r $git remote get-url | sort -u)" - shift -i="$($git remote | awk ' - BEGIN { n=-1; } - $0 ~ /^remote[0-9]+$/ { i=substr($0, length("remote")+1); - if (i>n) n=i; } - END { print n+1; }')" for remote do - # Don't `git remote add` remotes that already exist, which is not an error. - if echo "$existing_remotes" | grep -qF "$remote"; then - continue; - fi - - $git remote add remote$i $remote - i=$(($i+1)) + project_add_remote "$dir" "$remote" done $git fetch --all --tags -j4 From d8df2209552509c09885f8e1714b50c977514c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:47:40 +0100 Subject: [PATCH 09/26] utils/index-repository: refactor by creating project_fetch() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index-repository | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index e0fe18e5..c7ef768f 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -36,19 +36,22 @@ project_add_remote() { $git remote add remote$i "$2" } +# $1 is the project path (parent of data/ and repo/). +project_fetch() { + git -C $1/repo fetch --all --tags -j4 +} + dir=/srv/elixir-data/$1 project_init "$dir" -git="git -C $dir/repo" - shift for remote do project_add_remote "$dir" "$remote" done -$git fetch --all --tags -j4 +project_fetch "$dir" if test -z "$ELIXIR_THREADS"; then ELIXIR_THREADS="$(nproc)" From 219640ed7fdea1e87cd1a128ff6c5d3578554f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:48:39 +0100 Subject: [PATCH 10/26] utils/index-repository: refactor by creating project_index() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index-repository | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/utils/index-repository b/utils/index-repository index c7ef768f..e025117e 100755 --- a/utils/index-repository +++ b/utils/index-repository @@ -41,6 +41,16 @@ project_fetch() { git -C $1/repo fetch --all --tags -j4 } +# $1 is the project path (parent of data/ and repo/). +project_index() { + if test -z "$ELIXIR_THREADS"; then + ELIXIR_THREADS="$(nproc)" + fi + + LXR_REPO_DIR=$1/repo LXR_DATA_DIR=$1/data \ + python3 /usr/local/elixir/update.py $ELIXIR_THREADS +} + dir=/srv/elixir-data/$1 project_init "$dir" @@ -53,10 +63,4 @@ done project_fetch "$dir" -if test -z "$ELIXIR_THREADS"; then - ELIXIR_THREADS="$(nproc)" -fi - -export LXR_REPO_DIR=$dir/repo -export LXR_DATA_DIR=$dir/data -python3 /usr/local/elixir/update.py $ELIXIR_THREADS +project_index "$dir" From a2febaadaf87dca8c400f96c27455206d44bec8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 13:41:21 +0100 Subject: [PATCH 11/26] utils: rename index-repository to index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/{index-repository => index} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename utils/{index-repository => index} (100%) diff --git a/utils/index-repository b/utils/index similarity index 100% rename from utils/index-repository rename to utils/index From 6297a121963e45739a8ddb3665745daef5b99b43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 10:50:28 +0100 Subject: [PATCH 12/26] utils: deduplicate index-all-repositories into index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index | 78 +++++++++++++++++++++++++---- utils/index-all-repositories | 97 ------------------------------------ 2 files changed, 67 insertions(+), 108 deletions(-) delete mode 100755 utils/index-all-repositories diff --git a/utils/index b/utils/index index e025117e..9c060cc9 100755 --- a/utils/index +++ b/utils/index @@ -1,13 +1,20 @@ #!/bin/sh -if [ "$#" -lt 2 ]; then - echo "Usage: $0 repo_name repo_urls..." +if test $# -lt 3 -a \( $# -ne 2 -o "$2" != "--all" \); then + echo "Usage: $0 ..." + echo "Usage: $0 --all" exit 1 fi # $1 is the project path (inside will be created data/ and repo/). # It supports being called on an existing project. project_init() { + # Detect already inited projects. Avoids stderr logs. + # Using `git tag -n1` because `git status` doesn't work on bare repos. + if git -C $1/repo tag -n1 >/dev/null 2>/dev/null; then + return; + fi + mkdir -p $1/data $1/repo # This doesn't fail if repo already exists @@ -51,16 +58,65 @@ project_index() { python3 /usr/local/elixir/update.py $ELIXIR_THREADS } -dir=/srv/elixir-data/$1 +# $1 is the Elixir root data path. +# $2 is the project name. +# $... are the remote URLs. +index() { + dir="$1/$2" + + # Remember if we are doing an indexing from scratch. + if test ! "$(find $1/data -type f)"; then + from_scratch=1 + fi + + project_init "$dir" -project_init "$dir" + shift + shift + for remote + do + project_add_remote "$dir" "$remote" + done -shift -for remote -do - project_add_remote "$dir" "$remote" -done + project_fetch "$dir" + project_index "$dir" -project_fetch "$dir" + # Redo a fetch+indexing as the initial one probably took a lot of time. + if test "$from_scratch"; then + project_fetch "$dir" + project_index "$dir" + fi +} + +# Index a single project +if test $# -ge 3; then + index "$@" + exit +fi -project_index "$dir" +# Index all known projects +index $1 amazon-freertos https://github.com/aws/amazon-freertos.git +index $1 arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware +index $1 barebox https://git.pengutronix.de/git/barebox +index $1 busybox https://git.busybox.net/busybox +index $1 coreboot https://review.coreboot.org/coreboot.git +index $1 dpdk https://dpdk.org/git/dpdk \ + https://dpdk.org/git/dpdk-stable +index $1 glibc https://sourceware.org/git/glibc.git +index $1 llvm https://github.com/llvm/llvm-project.git +index $1 mesa https://gitlab.freedesktop.org/mesa/mesa.git +index $1 musl https://git.musl-libc.org/git/musl +index $1 ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git +index $1 op-tee https://github.com/OP-TEE/optee_os.git +index $1 qemu https://gitlab.com/qemu-project/qemu.git +index $1 u-boot https://source.denx.de/u-boot/u-boot.git +index $1 uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git +index $1 zephyr https://github.com/zephyrproject-rtos/zephyr +index $1 toybox https://github.com/landley/toybox.git +index $1 grub https://git.savannah.gnu.org/git/grub.git +index $1 bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git +index $1 linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git \ + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git \ + https://github.com/bootlin/linux-history.git +index $1 xen https://xenbits.xen.org/git-http/xen.git +index $1 freebsd https://git.freebsd.org/src.git diff --git a/utils/index-all-repositories b/utils/index-all-repositories deleted file mode 100755 index 8636286a..00000000 --- a/utils/index-all-repositories +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/bash -# Runs indexing from scratch on all supported projects -# Needed when full re-indexing is needed: -# - to fix an indexing bug -# - after database changes - -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2019--2020 Michael Opdenacker and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -if test -z "$ELIXIR_THREADS"; then - ELIXIR_THREADS="$(nproc)" -fi - -index() { - project=$1 - master=$2 - remote=$3 - remote2=$4 - echo "$project: first indexing pass..." - - export LXR_DATA_DIR=$ELIXIR_ROOT/$project/data - export LXR_REPO_DIR=$ELIXIR_ROOT/$project/repo - - mkdir -p $LXR_DATA_DIR - git clone --bare $master $LXR_REPO_DIR - - if [ "$remote" != "" ] - then - cd $LXR_REPO_DIR - git remote add other $remote - git fetch --tags other - fi - - if [ "$remote2" != "" ] - then - cd $LXR_REPO_DIR - git remote add other2 $remote2 - git fetch --tags other2 - fi - - cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS - - # The above can take so much time on the first run that it's worth running a new update - - echo "$project: updating the repository after the first indexing pass..." - cd $LXR_REPO_DIR - git fetch --all --tags - - echo "$project: 2nd indexing pass..." - cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS -} - -if [ "$ELIXIR_ROOT" = "" ] -then - echo "Error: ELIXIR_ROOT environment variable not set" - echo "It's where Elixir data are stored" - exit 1 -fi - -index amazon-freertos https://github.com/aws/amazon-freertos.git -index arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware -index barebox https://git.pengutronix.de/git/barebox -index busybox https://git.busybox.net/busybox -index coreboot https://review.coreboot.org/coreboot.git -index dpdk https://dpdk.org/git/dpdk https://dpdk.org/git/dpdk-stable -index glibc https://sourceware.org/git/glibc.git -index llvm https://github.com/llvm/llvm-project.git -index mesa https://gitlab.freedesktop.org/mesa/mesa.git -index musl https://git.musl-libc.org/git/musl -index ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git -index op-tee https://github.com/OP-TEE/optee_os.git -index qemu https://gitlab.com/qemu-project/qemu.git -index u-boot https://source.denx.de/u-boot/u-boot.git -index uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git -index zephyr https://github.com/zephyrproject-rtos/zephyr -index toybox https://github.com/landley/toybox.git -index grub https://git.savannah.gnu.org/git/grub.git -index bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git -index linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git https://github.com/bootlin/linux-history.git -index xen https://xenbits.xen.org/git-http/xen.git -index freebsd https://git.freebsd.org/src.git From b2a46945486f02e6bb2d69cf0126b3239deb151c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 11:41:30 +0100 Subject: [PATCH 13/26] utils/index: make it possible to update a specific project MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow calling like: ./utils/index musl That will do the same thing as before (fetch+index). It works only if a previous call was made to add remotes. Signed-off-by: Théo Lebrun --- utils/index | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/index b/utils/index index 9c060cc9..6b815ebc 100755 --- a/utils/index +++ b/utils/index @@ -1,7 +1,7 @@ #!/bin/sh -if test $# -lt 3 -a \( $# -ne 2 -o "$2" != "--all" \); then - echo "Usage: $0 ..." +if test $# -lt 2; then + echo "Usage: $0 [...]" echo "Usage: $0 --all" exit 1 fi @@ -89,7 +89,7 @@ index() { } # Index a single project -if test $# -ge 3; then +if test "$2" != "--all"; then index "$@" exit fi From ce78f4896426e62df4f18be15cc42def5398abe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 12:29:21 +0100 Subject: [PATCH 14/26] utils: deduplicate utils/update-elixir-data into utils/index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously: LXR_PROJ_DIR=/srv/elixir-data ./utils/update-elixir-data Now: ./utils/index /srv/elixir-data --all The impact is slightly different: it also has the side-effect of creating all known projects (Linux, U-Boot, etc.) if they didn't exist. We have asked around and we are not aware of any other Elixir instance. To keep the previous behavior, if people don't want to index all supported projects: x=/srv/elixir-data find $x -mindepth 1 -maxdepth 1 -printf "%f\n | \ xargs -L1 -r ./utils/index $x Signed-off-by: Théo Lebrun --- utils/index | 91 ++++++++++++++++++++++------------------ utils/update-elixir-data | 39 ----------------- 2 files changed, 50 insertions(+), 80 deletions(-) delete mode 100755 utils/update-elixir-data diff --git a/utils/index b/utils/index index 6b815ebc..44437885 100755 --- a/utils/index +++ b/utils/index @@ -29,7 +29,7 @@ project_add_remote() { git="git -C $1/repo" # Do nothing if remote already exists. - if $git remote | xargs -L1 -r $git remote get-url | grep -qxF "$2"; then + if $git remote | xargs -L1 -r $git remote get-url 2>/dev/null | grep -qxF "$2"; then return; fi @@ -61,14 +61,9 @@ project_index() { # $1 is the Elixir root data path. # $2 is the project name. # $... are the remote URLs. -index() { +add_remotes() { dir="$1/$2" - # Remember if we are doing an indexing from scratch. - if test ! "$(find $1/data -type f)"; then - from_scratch=1 - fi - project_init "$dir" shift @@ -77,46 +72,60 @@ index() { do project_add_remote "$dir" "$remote" done +} - project_fetch "$dir" - project_index "$dir" - - # Redo a fetch+indexing as the initial one probably took a lot of time. - if test "$from_scratch"; then - project_fetch "$dir" - project_index "$dir" +do_index() { + if test ! "$(find $1/data -type f)"; then + # If we are indexing from scratch, do it twice as the initial one + # probably took a lot of time. + project_fetch "$1" + project_index "$1" + project_fetch "$1" + project_index "$1" + else + project_fetch "$1" + project_index "$1" fi } # Index a single project if test "$2" != "--all"; then - index "$@" + dir="$1/$2" + add_remotes "$@" + do_index "$dir" exit fi -# Index all known projects -index $1 amazon-freertos https://github.com/aws/amazon-freertos.git -index $1 arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware -index $1 barebox https://git.pengutronix.de/git/barebox -index $1 busybox https://git.busybox.net/busybox -index $1 coreboot https://review.coreboot.org/coreboot.git -index $1 dpdk https://dpdk.org/git/dpdk \ - https://dpdk.org/git/dpdk-stable -index $1 glibc https://sourceware.org/git/glibc.git -index $1 llvm https://github.com/llvm/llvm-project.git -index $1 mesa https://gitlab.freedesktop.org/mesa/mesa.git -index $1 musl https://git.musl-libc.org/git/musl -index $1 ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git -index $1 op-tee https://github.com/OP-TEE/optee_os.git -index $1 qemu https://gitlab.com/qemu-project/qemu.git -index $1 u-boot https://source.denx.de/u-boot/u-boot.git -index $1 uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git -index $1 zephyr https://github.com/zephyrproject-rtos/zephyr -index $1 toybox https://github.com/landley/toybox.git -index $1 grub https://git.savannah.gnu.org/git/grub.git -index $1 bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git -index $1 linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git \ - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git \ - https://github.com/bootlin/linux-history.git -index $1 xen https://xenbits.xen.org/git-http/xen.git -index $1 freebsd https://git.freebsd.org/src.git +# Add all known projects remotes +add_remotes $1 amazon-freertos https://github.com/aws/amazon-freertos.git +add_remotes $1 arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware +add_remotes $1 barebox https://git.pengutronix.de/git/barebox +add_remotes $1 busybox https://git.busybox.net/busybox +add_remotes $1 coreboot https://review.coreboot.org/coreboot.git +add_remotes $1 dpdk https://dpdk.org/git/dpdk \ + https://dpdk.org/git/dpdk-stable +add_remotes $1 glibc https://sourceware.org/git/glibc.git +add_remotes $1 llvm https://github.com/llvm/llvm-project.git +add_remotes $1 mesa https://gitlab.freedesktop.org/mesa/mesa.git +add_remotes $1 musl https://git.musl-libc.org/git/musl +add_remotes $1 ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git +add_remotes $1 op-tee https://github.com/OP-TEE/optee_os.git +add_remotes $1 qemu https://gitlab.com/qemu-project/qemu.git +add_remotes $1 u-boot https://source.denx.de/u-boot/u-boot.git +add_remotes $1 uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git +add_remotes $1 zephyr https://github.com/zephyrproject-rtos/zephyr +add_remotes $1 toybox https://github.com/landley/toybox.git +add_remotes $1 grub https://git.savannah.gnu.org/git/grub.git +add_remotes $1 bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git +add_remotes $1 linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git \ + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git \ + https://github.com/bootlin/linux-history.git +add_remotes $1 xen https://xenbits.xen.org/git-http/xen.git +add_remotes $1 freebsd https://git.freebsd.org/src.git + +# Index all projects. +# Note: this is not only the above ones but all the ones in $1. +find $1 -mindepth 1 -maxdepth 1 -type d | \ +while read dir; do + do_index "$dir" +done diff --git a/utils/update-elixir-data b/utils/update-elixir-data deleted file mode 100755 index ad0fcbab..00000000 --- a/utils/update-elixir-data +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2019--2020 Michael Opdenacker and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -if test -z "$ELIXIR_THREADS"; then - ELIXIR_THREADS="$(nproc)" -fi - -if [ -z "$LXR_PROJ_DIR" ]; then - echo "ERROR: LXR_PROJ_DIR environment variable not set" - exit 1 -fi - -for dir_name in $LXR_PROJ_DIR/*; do - echo "Processing project $dir_name ..." - export LXR_DATA_DIR=$dir_name/data - export LXR_REPO_DIR=$dir_name/repo - - cd $LXR_REPO_DIR - git fetch --all --tags -j4 - - cd $ELIXIR_INSTALL - ./update.py $ELIXIR_THREADS -done From 122ed225b3ab9bfdb3a4c4ee9cd97552f0e0aec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 12:51:50 +0100 Subject: [PATCH 15/26] utils/index: add init.defaultBranch= config to `git init` call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid the following Git warning: hint: Using 'master' as the name for the initial branch. This default branch name hint: is subject to change. To configure the initial branch name to use in all hint: of your new repositories, which will suppress this warning, call: hint: hint: git config --global init.defaultBranch hint: hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and hint: 'development'. The just-created branch can be renamed via this command: hint: hint: git branch -m Signed-off-by: Théo Lebrun --- utils/index | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/index b/utils/index index 44437885..47c3cad7 100755 --- a/utils/index +++ b/utils/index @@ -17,8 +17,7 @@ project_init() { mkdir -p $1/data $1/repo - # This doesn't fail if repo already exists - git -C $1/repo init --bare + git -C $1/repo -c init.defaultBranch=main init --bare git config --system --add safe.directory $1/repo } From 6105f40b2e460aa2625f9422e7a28759c8c3fc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 14:38:50 +0100 Subject: [PATCH 16/26] utils: deduplicate pack-repositories into index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit utils/pack-repositories did the following on repos which have a gc.log file existing (created when GC fails): git prune git gc --aggressive git prune git gc --aggressive Here we: - Delete utils/pack-repositories; we don't want that detection to be done manually. Instead, we integrate the gc.log detection into utils/index that should be called often. - Create a hidden flag ($ELIXIR_GC) to allow manual trigger. - Replace the above sequence with a simpler `git gc --aggressive`. Let's trust Git. - Do a `git gc --auto` in the default case. This call is automatically done by porcelain commands but we don't run any so let's give Git an opportunity to cleanup from time to time (heuristic based). - Replace the gc.log detection from: find . -name gc.log To: test -e $data/$project/repo/gc.log It should be more reliable. With the first approach we risk projects that contain a file gc.log to trigger the detection on each run. Signed-off-by: Théo Lebrun --- utils/index | 14 +++++++++++- utils/pack-repositories | 48 ----------------------------------------- 2 files changed, 13 insertions(+), 49 deletions(-) delete mode 100755 utils/pack-repositories diff --git a/utils/index b/utils/index index 47c3cad7..576c6d76 100755 --- a/utils/index +++ b/utils/index @@ -44,7 +44,19 @@ project_add_remote() { # $1 is the project path (parent of data/ and repo/). project_fetch() { - git -C $1/repo fetch --all --tags -j4 + git="git -C $1/repo" + + $git fetch --all --tags -j4 + + # A gc.log file implies a garbage collect failed in the past. + # Also, create a hidden flag which could be useful to trigger GCs manually. + if test -e $1/repo/gc.log -o "$ELIXIR_GC"; then + $git gc --aggressive + else + # Otherwise, give Git an occasion to trigger a GC. + # Porcelain commands should trigger that, but we don't use any. + $git gc --auto + fi } # $1 is the project path (parent of data/ and repo/). diff --git a/utils/pack-repositories b/utils/pack-repositories deleted file mode 100755 index 7433577c..00000000 --- a/utils/pack-repositories +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/sh -# -# Goes through all git repositories in the current directory -# and runs "git prune" and "git gc --aggressive" to pack -# objects very efficiently and reduce size sometimes dramatically -# -# Works by finding the repositories which have a gc.log -# file, indicating that git garbage collection failed at some -# point, causing the repository to grow very fast -# each time new objects are fetched. -# -# Example: llvm-project: divided disk usage by 60! - -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2019--2020 Michael Opdenacker and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -for f in `find . -name gc.log` -do - d=`dirname $f` - echo "Processing: $d" - cd $d - echo -n "Initial size: " - echo `du -sh .` - rm -f gc.log - git prune - git gc --aggressive - echo -n "Size after first pass: " - echo `du -sh .` - git prune - git gc --aggressive - echo -n "Size after second pass: " - echo `du -sh .` - cd .. -done From f50cb641959b614776711f3190607d686c09a02c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 14:51:20 +0100 Subject: [PATCH 17/26] README: remove "Keeping git repository disk usage under control" section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New script utils/index does an automatic call to `git gc --auto` and if it detects a gc.log file, it runs `git gc --aggressive`. There shouldn't be any reason for people to have to think about that aspect. Remove that info from the README and make it lighter weight. Signed-off-by: Théo Lebrun --- README.adoc | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/README.adoc b/README.adoc index d9c11f75..48d6fd59 100644 --- a/README.adoc +++ b/README.adoc @@ -261,29 +261,6 @@ through a daily cron job. You can set `$ELIXIR_THREADS` if you want to change the number of threads used by update.py for indexing (by default the number of CPUs on your system). -== Keeping git repository disk usage under control - -As you keep updating your git repositories, you may notice that some can become -considerably bigger than they originally were. This seems to happen when a `gc.log` -file appears in a big repository, apparently causing git's garbage collector (`git gc`) -to fail, and therefore causing the repository to consume disk space at a fast -pace every time new objects are fetched. - -When this happens, you can save disk space by packing git directories as follows: - ----- -cd -git prune -rm gc.log -git gc --aggressive ----- - -Actually, a second pass with the above commands will save even more space. - -To process multiple git repositories in a loop, you may use the -`utils/pack-repositories` that we are providing, run from the directory -where all repositories are found. - = Building Docker images Dockerfiles are provided in the `docker/` directory. From 5248656486bcfe0bb8e45441a956206bfcd3d3a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 15:14:15 +0100 Subject: [PATCH 18/26] utils/index: allow indexing project with remote URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, to start an indexing from scratch: ./utils/index /srv/elixir-data musl https://git.musl-libc.org/git/musl This is annoying as the script already has the remote URLs for all known projects. Now, a call without remote will automatically add the remote URLs matching the project name: ./utils/index /srv/elixir-data musl This copies the behavior that was previously only implemented for --all. Signed-off-by: Théo Lebrun --- utils/index | 83 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/utils/index b/utils/index index 576c6d76..0647a238 100755 --- a/utils/index +++ b/utils/index @@ -85,6 +85,19 @@ add_remotes() { done } +# Call add_remotes() if no remotes are passed as arguments. +# +# $1 is the Elixir root data path. +# $2 is the CLI arg count. +# $3 is the CLI arg for project name (can be --all). +# $4 is the project name. +# $... are the default remote URLs. +add_default_remotes() { + if test $2 -eq 2 -a \( "$3" = "--all" -o "$3" = "$4" \); then + add_remotes "$1" "$4" ${@:5} + fi +} + do_index() { if test ! "$(find $1/data -type f)"; then # If we are indexing from scratch, do it twice as the initial one @@ -99,44 +112,46 @@ do_index() { fi } +# Add all known projects remotes. This works in two cases: +# ./utils/index --all # => Add default remotes for all projects +# ./utils/index musl # => Add default remote for musl +add_default_remotes $1 $# $2 amazon-freertos https://github.com/aws/amazon-freertos.git +add_default_remotes $1 $# $2 arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware +add_default_remotes $1 $# $2 barebox https://git.pengutronix.de/git/barebox +add_default_remotes $1 $# $2 busybox https://git.busybox.net/busybox +add_default_remotes $1 $# $2 coreboot https://review.coreboot.org/coreboot.git +add_default_remotes $1 $# $2 dpdk https://dpdk.org/git/dpdk \ + https://dpdk.org/git/dpdk-stable +add_default_remotes $1 $# $2 glibc https://sourceware.org/git/glibc.git +add_default_remotes $1 $# $2 llvm https://github.com/llvm/llvm-project.git +add_default_remotes $1 $# $2 mesa https://gitlab.freedesktop.org/mesa/mesa.git +add_default_remotes $1 $# $2 musl https://git.musl-libc.org/git/musl +add_default_remotes $1 $# $2 ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git +add_default_remotes $1 $# $2 op-tee https://github.com/OP-TEE/optee_os.git +add_default_remotes $1 $# $2 qemu https://gitlab.com/qemu-project/qemu.git +add_default_remotes $1 $# $2 u-boot https://source.denx.de/u-boot/u-boot.git +add_default_remotes $1 $# $2 uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git +add_default_remotes $1 $# $2 zephyr https://github.com/zephyrproject-rtos/zephyr +add_default_remotes $1 $# $2 toybox https://github.com/landley/toybox.git +add_default_remotes $1 $# $2 grub https://git.savannah.gnu.org/git/grub.git +add_default_remotes $1 $# $2 bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git +add_default_remotes $1 $# $2 linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git \ + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git \ + https://github.com/bootlin/linux-history.git +add_default_remotes $1 $# $2 xen https://xenbits.xen.org/git-http/xen.git +add_default_remotes $1 $# $2 freebsd https://git.freebsd.org/src.git + # Index a single project if test "$2" != "--all"; then dir="$1/$2" add_remotes "$@" do_index "$dir" - exit +else + # Index all projects. + # Note: this is not only the default projects ones but all the ones in $1. + find $1 -mindepth 1 -maxdepth 1 -type d | \ + while read dir; do + do_index "$dir" + done fi -# Add all known projects remotes -add_remotes $1 amazon-freertos https://github.com/aws/amazon-freertos.git -add_remotes $1 arm-trusted-firmware https://github.com/ARM-software/arm-trusted-firmware -add_remotes $1 barebox https://git.pengutronix.de/git/barebox -add_remotes $1 busybox https://git.busybox.net/busybox -add_remotes $1 coreboot https://review.coreboot.org/coreboot.git -add_remotes $1 dpdk https://dpdk.org/git/dpdk \ - https://dpdk.org/git/dpdk-stable -add_remotes $1 glibc https://sourceware.org/git/glibc.git -add_remotes $1 llvm https://github.com/llvm/llvm-project.git -add_remotes $1 mesa https://gitlab.freedesktop.org/mesa/mesa.git -add_remotes $1 musl https://git.musl-libc.org/git/musl -add_remotes $1 ofono https://git.kernel.org/pub/scm/network/ofono/ofono.git -add_remotes $1 op-tee https://github.com/OP-TEE/optee_os.git -add_remotes $1 qemu https://gitlab.com/qemu-project/qemu.git -add_remotes $1 u-boot https://source.denx.de/u-boot/u-boot.git -add_remotes $1 uclibc-ng https://cgit.uclibc-ng.org/cgi/cgit/uclibc-ng.git -add_remotes $1 zephyr https://github.com/zephyrproject-rtos/zephyr -add_remotes $1 toybox https://github.com/landley/toybox.git -add_remotes $1 grub https://git.savannah.gnu.org/git/grub.git -add_remotes $1 bluez https://git.kernel.org/pub/scm/bluetooth/bluez.git -add_remotes $1 linux https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git \ - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git \ - https://github.com/bootlin/linux-history.git -add_remotes $1 xen https://xenbits.xen.org/git-http/xen.git -add_remotes $1 freebsd https://git.freebsd.org/src.git - -# Index all projects. -# Note: this is not only the above ones but all the ones in $1. -find $1 -mindepth 1 -maxdepth 1 -type d | \ -while read dir; do - do_index "$dir" -done From bf1dbec65598819dc5b644527918c96478e98606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 15:16:01 +0100 Subject: [PATCH 19/26] utils/index: remove `git config --system --add safe.directory` call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop writing a global file when initializing projects. This can cause permission issues. We instead pass the option manually for each Git process call using: git -c safe.directory=... Signed-off-by: Théo Lebrun --- utils/index | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/index b/utils/index index 0647a238..0f7cc37e 100755 --- a/utils/index +++ b/utils/index @@ -18,14 +18,12 @@ project_init() { mkdir -p $1/data $1/repo git -C $1/repo -c init.defaultBranch=main init --bare - - git config --system --add safe.directory $1/repo } # $1 is the project path (parent of data/ and repo/). # $2 is the remote URL. project_add_remote() { - git="git -C $1/repo" + git="git -C $1/repo -c safe.directory=$1/repo" # Do nothing if remote already exists. if $git remote | xargs -L1 -r $git remote get-url 2>/dev/null | grep -qxF "$2"; then @@ -44,7 +42,7 @@ project_add_remote() { # $1 is the project path (parent of data/ and repo/). project_fetch() { - git="git -C $1/repo" + git="git -C $1/repo -c safe.directory=$1/repo" $git fetch --all --tags -j4 From 4c7f61ac0cc3f7b13d19da3129defe2ea25a4de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 15:23:24 +0100 Subject: [PATCH 20/26] utils/index: remove /usr/local/elixir/update.py absolute path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead, start from $0 and move back up two times. So, something like: ./elixir/utils/index ./elixir/utils ./elixir ./elixir/update.py Signed-off-by: Théo Lebrun --- utils/index | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/index b/utils/index index 0f7cc37e..38b4e714 100755 --- a/utils/index +++ b/utils/index @@ -63,8 +63,10 @@ project_index() { ELIXIR_THREADS="$(nproc)" fi + elixir_sources="$(dirname "$(dirname "$0")")" + LXR_REPO_DIR=$1/repo LXR_DATA_DIR=$1/data \ - python3 /usr/local/elixir/update.py $ELIXIR_THREADS + python3 "$elixir_sources/update.py" $ELIXIR_THREADS } # $1 is the Elixir root data path. From d6e1cb1c6f0596725a6920c707b361174729a91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 8 Nov 2024 15:30:54 +0100 Subject: [PATCH 21/26] README: update following utils/* script changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- README.adoc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.adoc b/README.adoc index 48d6fd59..c8891fd4 100644 --- a/README.adoc +++ b/README.adoc @@ -255,7 +255,7 @@ as a front-end to reduce the load on the server running the Elixir code. == Keeping Elixir databases up to date To keep your Elixir databases up to date and index new versions that are released, -we're proposing to use a script like `utils/update-elixir-data` which is called +we're proposing to use a script like `utils/index /srv/elixir-data --all` which is called through a daily cron job. You can set `$ELIXIR_THREADS` if you want to change the number of threads used by @@ -284,8 +284,7 @@ For example, to add the https://musl.libc.org/[musl] repository, run: # docker exec -it -e PYTHONUNBUFFERED=1 elixir-container \ /bin/bash -c 'export "PATH=/usr/local/elixir/venv/bin:$PATH" ; \ - /usr/local/elixir/utils/index-repository \ - musl https://git.musl-libc.org/git/musl' + /usr/local/elixir/utils/index /srv/elixir-data musl' Without PYTHONUNBUFFERED environment variable, update logs may show up with a delay. @@ -294,10 +293,9 @@ Or, to run indexing in a separate container: # docker run -e PYTHONUNBUFFERED=1 -v ./elixir-data/:/srv/elixir-data \ --entrypoint /bin/bash elixir -c \ 'export "PATH=/usr/local/elixir/venv/bin:$PATH" ; \ - /usr/local/elixir/utils/index-repository \ - musl https://git.musl-libc.org/git/musl' + /usr/local/elixir/utils/index /srv/elixir-data musl' -You can also use utils/index-all-repositories to start indexing all officially supported repositories. +You can also use `utils/index /srv/elixir-data --all` to start indexing all officially supported repositories. After indexing is done, Elixir should be available under the following URL on your host: http://172.17.0.2/musl/latest/source @@ -309,7 +307,7 @@ If 172.17.0.2 does not answer, you can check the IP address of the container by == Automatic repository updates The Docker image does not automatically update repositories by itself. -You can, for example, start `utils/update-elixir-data` in the container (or in a separate container, with Elixir data volume/directory mounted) +You can, for example, start `utils/index /srv/elixir-data --all` in the container (or in a separate container, with Elixir data volume/directory mounted) from cron on the host to periodically update repositories. == Using Docker image as a development server From 80ca8ace211e07ac390158f818e595decd91e93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 20 Dec 2024 22:23:24 +0100 Subject: [PATCH 22/26] utils/index: force use of bash, we depend on it for ${@:5} syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/index b/utils/index index 38b4e714..2b99c96b 100755 --- a/utils/index +++ b/utils/index @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash if test $# -lt 2; then echo "Usage: $0 [...]" From e898e29a0fb5150516d5c90aa87425b75ac035dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Fri, 20 Dec 2024 22:23:49 +0100 Subject: [PATCH 23/26] utils/index: avoid passing argument to test(1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- utils/index | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/index b/utils/index index 2b99c96b..6e84a3e7 100755 --- a/utils/index +++ b/utils/index @@ -142,7 +142,7 @@ add_default_remotes $1 $# $2 xen https://xenbits.xen.org/git-http/xen.git add_default_remotes $1 $# $2 freebsd https://git.freebsd.org/src.git # Index a single project -if test "$2" != "--all"; then +if test "x$2" != "x--all"; then dir="$1/$2" add_remotes "$@" do_index "$dir" From 6af146b64a5d55939f5115aedc1d6726edb72766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Sat, 21 Dec 2024 17:21:00 +0100 Subject: [PATCH 24/26] Dockerfile: add virtualenv to $PATH by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- README.adoc | 8 +++----- docker/Dockerfile | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.adoc b/README.adoc index c8891fd4..e5c8b4d5 100644 --- a/README.adoc +++ b/README.adoc @@ -283,17 +283,15 @@ To index a repository, you can use the `index-repository` script. For example, to add the https://musl.libc.org/[musl] repository, run: # docker exec -it -e PYTHONUNBUFFERED=1 elixir-container \ - /bin/bash -c 'export "PATH=/usr/local/elixir/venv/bin:$PATH" ; \ - /usr/local/elixir/utils/index /srv/elixir-data musl' + /usr/local/elixir/utils/index -c '/srv/elixir-data musl' Without PYTHONUNBUFFERED environment variable, update logs may show up with a delay. Or, to run indexing in a separate container: # docker run -e PYTHONUNBUFFERED=1 -v ./elixir-data/:/srv/elixir-data \ - --entrypoint /bin/bash elixir -c \ - 'export "PATH=/usr/local/elixir/venv/bin:$PATH" ; \ - /usr/local/elixir/utils/index /srv/elixir-data musl' + --entrypoint /usr/local/elixir/utils/index elixir -c \ + '/srv/elixir-data musl' You can also use `utils/index /srv/elixir-data --all` to start indexing all officially supported repositories. diff --git a/docker/Dockerfile b/docker/Dockerfile index 2e5180f0..056dd3c6 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,5 +57,6 @@ ARG ELIXIR_VERSION ENV ELIXIR_VERSION=$ELIXIR_VERSION ENV ELIXIR_ROOT=/srv/elixir-data +ENV PATH="/usr/local/elixir/venv/bin:$PATH" ENTRYPOINT ["/usr/sbin/apache2ctl", "-D", "FOREGROUND"] From c3eabc173ade0f7f7fb9360c546a8991fa1dde41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Sat, 21 Dec 2024 17:23:37 +0100 Subject: [PATCH 25/26] Dockerfile: set PYTHONUNBUFFERED=1 by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes the stdout/stderr buffering behavior of Python. Without it, indexing scripts don't stream updates and use really big buffers. Signed-off-by: Théo Lebrun --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 056dd3c6..dcbd2027 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -58,5 +58,6 @@ ENV ELIXIR_VERSION=$ELIXIR_VERSION ENV ELIXIR_ROOT=/srv/elixir-data ENV PATH="/usr/local/elixir/venv/bin:$PATH" +ENV PYTHONUNBUFFERED=1 ENTRYPOINT ["/usr/sbin/apache2ctl", "-D", "FOREGROUND"] From d207b1116e82e372c18051ee96845cfa2164de10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Lebrun?= Date: Sat, 21 Dec 2024 17:24:52 +0100 Subject: [PATCH 26/26] Dockerfile: add utils/ in $PATH by default, for easy indexing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Théo Lebrun --- README.adoc | 16 +++++++--------- docker/Dockerfile | 1 + 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/README.adoc b/README.adoc index e5c8b4d5..88cb3c5d 100644 --- a/README.adoc +++ b/README.adoc @@ -255,7 +255,7 @@ as a front-end to reduce the load on the server running the Elixir code. == Keeping Elixir databases up to date To keep your Elixir databases up to date and index new versions that are released, -we're proposing to use a script like `utils/index /srv/elixir-data --all` which is called +we're proposing to use a script like `index /srv/elixir-data --all` which is called through a daily cron job. You can set `$ELIXIR_THREADS` if you want to change the number of threads used by @@ -282,18 +282,16 @@ The Docker image does not contain any repositories. To index a repository, you can use the `index-repository` script. For example, to add the https://musl.libc.org/[musl] repository, run: - # docker exec -it -e PYTHONUNBUFFERED=1 elixir-container \ - /usr/local/elixir/utils/index -c '/srv/elixir-data musl' - -Without PYTHONUNBUFFERED environment variable, update logs may show up with a delay. + # docker exec -it elixir-container \ + index -c '/srv/elixir-data musl' Or, to run indexing in a separate container: - # docker run -e PYTHONUNBUFFERED=1 -v ./elixir-data/:/srv/elixir-data \ - --entrypoint /usr/local/elixir/utils/index elixir -c \ + # docker run -v ./elixir-data/:/srv/elixir-data \ + --entrypoint index elixir -c \ '/srv/elixir-data musl' -You can also use `utils/index /srv/elixir-data --all` to start indexing all officially supported repositories. +You can also use `index /srv/elixir-data --all` to start indexing all officially supported repositories. After indexing is done, Elixir should be available under the following URL on your host: http://172.17.0.2/musl/latest/source @@ -305,7 +303,7 @@ If 172.17.0.2 does not answer, you can check the IP address of the container by == Automatic repository updates The Docker image does not automatically update repositories by itself. -You can, for example, start `utils/index /srv/elixir-data --all` in the container (or in a separate container, with Elixir data volume/directory mounted) +You can, for example, start `index /srv/elixir-data --all` in the container (or in a separate container, with Elixir data volume/directory mounted) from cron on the host to periodically update repositories. == Using Docker image as a development server diff --git a/docker/Dockerfile b/docker/Dockerfile index dcbd2027..0f494088 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -59,5 +59,6 @@ ENV ELIXIR_VERSION=$ELIXIR_VERSION ENV ELIXIR_ROOT=/srv/elixir-data ENV PATH="/usr/local/elixir/venv/bin:$PATH" ENV PYTHONUNBUFFERED=1 +ENV PATH="/usr/local/elixir/utils:$PATH" ENTRYPOINT ["/usr/sbin/apache2ctl", "-D", "FOREGROUND"]