Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration tests for windows service support #3733

Merged
merged 11 commits into from
Jan 19, 2023
49 changes: 46 additions & 3 deletions pkg/common/entrypoint/entrypoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ package entrypoint

import (
"context"
"errors"
"fmt"
"os"
"strings"
"unsafe"

"golang.org/x/sys/windows"

"golang.org/x/sys/windows/svc"
)
Expand All @@ -20,7 +25,10 @@ type systemCall struct {
}

func (s *systemCall) IsWindowsService() (bool, error) {
return svc.IsWindowsService()
// We are using a custom function because the svc.IsWindowsService() one still has an open issue in which it states
// that it is not working properly in Windows containers: https://github.com/golang/go/issues/56335. Soon as we have
Copy link
Contributor

@faisal-memon faisal-memon Jan 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth submitting a PR? That issue has a help wanted tag and it seems like you have a fix below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A PR is already opened for this, in fact, the custom function with the workaround is based on the opened solution proposal: golang/sys#141

// a fix for that, we can use the original function.
return isWindowsService()
}

func (s *systemCall) Run(name string, handler svc.Handler) error {
Expand Down Expand Up @@ -51,11 +59,11 @@ func (e *EntryPoint) Main() int {
// Determining if SPIRE is running as a Windows service is done
// with a best-effort approach. If there is an error, just fallback
// to the behavior of not running as a Windows service.
isWindowsService, err := e.sc.IsWindowsService()
isWindowsSvc, err := e.sc.IsWindowsService()
if err != nil {
fmt.Fprintf(os.Stderr, "Could not determine if running as a Windows service: %v", err)
}
if isWindowsService {
if isWindowsSvc {
errChan := make(chan error)
go func() {
// Since the service runs in its own process, the service name is ignored.
Expand All @@ -71,3 +79,38 @@ func (e *EntryPoint) Main() int {

return e.runCmdFn(context.Background(), os.Args[1:])
}

// isWindowsService is a copy of the svc.IsWindowsService() function, but without the parentProcess.SessionID == 0 check
// that is causing the issue in Windows containers, this logic is exactly the same from .NET runtime (>= 6.0.10).
func isWindowsService() (bool, error) {
// The below technique looks a bit hairy, but it's actually
// exactly what the .NET runtime (>= 6.0.10) does for the similarly named function:
// https://github.com/dotnet/runtime/blob/36bf84fc4a89209f4fdbc1fc201e81afd8be49b0/src/libraries/Microsoft.Extensions.Hosting.WindowsServices/src/WindowsServiceHelpers.cs#L20-L33
// Specifically, it looks up whether the parent process is called "services".

var currentProcess windows.PROCESS_BASIC_INFORMATION
infoSize := uint32(unsafe.Sizeof(currentProcess))
err := windows.NtQueryInformationProcess(windows.CurrentProcess(), windows.ProcessBasicInformation, unsafe.Pointer(&currentProcess), infoSize, &infoSize)
if err != nil {
return false, err
}
var parentProcess *windows.SYSTEM_PROCESS_INFORMATION
for infoSize = uint32((unsafe.Sizeof(*parentProcess) + unsafe.Sizeof(uintptr(0))) * 1024); ; {
parentProcess = (*windows.SYSTEM_PROCESS_INFORMATION)(unsafe.Pointer(&make([]byte, infoSize)[0]))
err = windows.NtQuerySystemInformation(windows.SystemProcessInformation, unsafe.Pointer(parentProcess), infoSize, &infoSize)
if err == nil {
break
} else if !errors.Is(err, windows.STATUS_INFO_LENGTH_MISMATCH) {
return false, err
}
}
for ; ; parentProcess = (*windows.SYSTEM_PROCESS_INFORMATION)(unsafe.Pointer(uintptr(unsafe.Pointer(parentProcess)) + uintptr(parentProcess.NextEntryOffset))) {
if parentProcess.UniqueProcessID == currentProcess.InheritedFromUniqueProcessId {
return strings.EqualFold("services.exe", parentProcess.ImageName.String()), nil
}
if parentProcess.NextEntryOffset == 0 {
break
}
}
return false, nil
}
1 change: 1 addition & 0 deletions test/integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,4 @@ The following environment variables are available to the teardown script:
* [Self Test](suites/self-test/README.md)
* [SPIRE Server CLI](suites/spire-server-cli/README.md)
* [Upgrade](suites/upgrade/README.md)
* [Windows Service](suites-windows/windows-service/README.md)
6 changes: 6 additions & 0 deletions test/integration/suites-windows/windows-service/00-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

pwd
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why pwd is required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it is actually required, I took it from another test sample (windows workload attestor). I'm going to check if it is really necessary.

"${ROOTDIR}/setup/x509pop/setup.sh" conf/server conf/agent

docker build --target spire-base -t spire-base .
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./common

docker-up spire-base

create-service spire-server C:\\spire\\bin\\spire-server.exe
start-service spire-server run -config C:/spire/conf/server/server.conf
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to be consistent and use \\ or / in all places?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! nice suggestion

assert-service-status spire-server RUNNING
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

log-debug "bootstrapping agent..."
docker-compose exec -T spire-base \
c:/spire/bin/spire-server bundle show > conf/agent/bootstrap.crt || fail-now "failed to bootstrap agent"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
source ./common

create-service spire-agent C:\\spire\\bin\\spire-agent.exe
start-service spire-agent run -config C:/spire/conf/agent/agent.conf
assert-service-status spire-agent RUNNING
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
source ./common

log-debug "creating regular registration entry..."
docker-compose exec -T spire-base \
c:/spire/bin/spire-server entry create \
-parentID "spiffe://domain.test/spire/agent/x509pop/$(fingerprint conf/agent/agent.crt.pem)" \
-spiffeID "spiffe://domain.test/workload" \
-selector "windows:user_name:User Manager\ContainerUser" \
-ttl 0

assert-synced-entry "spiffe://domain.test/workload"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

log-debug "test fetch x509 SVID..."
docker-compose exec -T -u ContainerUser spire-base \
c:/spire/bin/spire-agent api fetch x509 || fail-now "failed to fetch x509"

log-debug "test fetch JWT SVID..."
docker-compose exec -T -u ContainerUser spire-base \
c:/spire/bin/spire-agent api fetch jwt -audience mydb fail-now "failed to fetch x509"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
docker-compose exec -T -u ContainerUser spire-base \
c:/spire/bin/spire-agent api fetch jwt -audience mydb fail-now "failed to fetch x509"
docker-compose exec -T -u ContainerUser spire-base \
c:/spire/bin/spire-agent api fetch jwt -audience mydb || fail-now "failed to fetch JWT"

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
source ./common

stop-service spire-agent
assert-service-status spire-agent STOPPED
assert-graceful-shutdown agent

stop-service spire-server
assert-service-status spire-server STOPPED
assert-graceful-shutdown server
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./common

start-service spire-server run -config invalid-config-path
assert-service-status spire-server STOPPED

start-service spire-agent run -config invalid-config-path
assert-service-status spire-agent STOPPED
7 changes: 7 additions & 0 deletions test/integration/suites-windows/windows-service/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM spire-agent-windows:latest-local as spire-agent-windows

FROM spire-server-windows:latest-local as spire-base

COPY --from=spire-agent-windows C:/spire/bin/spire-agent.exe C:/spire/bin/spire-agent.exe
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why you choose to use the same container to start both services?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, in this initial version, I tried to reproduce a test that I ran manually on my local machine (spire server and agent on the same host). Still, I see that a more realistic scenario would be running them in different hosts, right? I can update it :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that is 'generally' the case


ENTRYPOINT [ "cmd" ]
19 changes: 19 additions & 0 deletions test/integration/suites-windows/windows-service/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPIRE Server CLI Suite

## Description

This suite validates that we can run both spire agent and spire server natively on Windows OS, asserting that spire components
can run as a [windows service application](https://learn.microsoft.com/en-us/dotnet/framework/windows-services/introduction-to-windows-service-applications#service-applications-vs-other-visual-studio-applications),
and perform [service state transitions](https://learn.microsoft.com/en-us/windows/win32/services/service-status-transitions).

The suite steps are structured as follows:

1. Spire server and agent are installed as Windows services.
2. Spire server and agent services starts, their respective status is asserted as **_RUNNING_**, and the node attestation
is performed with x509pop.
3. Workload registration entries are created.
4. The feature of fetching SVIDs (x509 and JWT) is asserted with the running spire agent service.
5. Spire server and agent services are stopped, their respective status is asserted as **_STOPPED_**, and graceful
shutdown is verified via application logs.
6. Spire server and agent services are started again, but this time with an invalid config; their respective status is
asserted as **_STOPPED_**.
66 changes: 66 additions & 0 deletions test/integration/suites-windows/windows-service/common
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash

assert-synced-entry() {
# Check at most 30 times (with one second in between) that the agent has
# successfully synced down the workload entry.
MAXCHECKS=30
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for synced entry ($i of $MAXCHECKS max)..."
if grep -wq "$1" conf/agent/logs.txt; then
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to sync down entry"
}

assert-service-status() {
MAXCHECKS=10
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for $1 service $2 ($i of $MAXCHECKS max)..."
scCommand=$([ "$2" == "STOPPED" ] && echo "query" || echo "interrogate")
if docker-compose exec -T -u ContainerAdministrator spire-base sc "$scCommand" "$1" | grep -wq "$2"; then
log-info "$1 is in $2 state"
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "$1 service failed to reach $2 state"
}

assert-graceful-shutdown() {
MAXCHECKS=10
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for graceful shutdown ($i of $MAXCHECKS max)..."
if grep -wq "stopped gracefully" conf/"$1"/logs.txt; then
log-info "$1 stopped gracefully"
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for $1 graceful shutdown"
}

create-service() {
log-info "creating $1 service..."
docker-compose exec -T -u ContainerAdministrator spire-base \
sc create "$1" binPath="$2" || grep "STOPPED" fail-now "failed to create $1 service"
}

stop-service() {
log-info "stopping $1 service..."
docker-compose exec -T -u ContainerAdministrator spire-base \
sc stop "$1" || fail-now "failed to stop $1 service"
}

start-service(){
log-info "starting $1 service..."
docker-compose exec -T -u ContainerAdministrator spire-base \
sc start "$@" | grep -wq "START_PENDING" || fail-now "failed to start $1 service"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
agent {
data_dir = "c:/spire/data/agent"
log_level = "DEBUG"
server_address = "127.0.0.1"
log_file ="c:/spire/conf/agent/logs.txt"
server_port = "8081"
trust_bundle_path = "c:/spire/conf/agent/bootstrap.crt"
trust_domain = "domain.test"
}

plugins {
NodeAttestor "x509pop" {
plugin_data {
private_key_path = "c:/spire/conf/agent/agent.key.pem"
certificate_path = "c:/spire/conf/agent/agent.crt.pem"
}
}
KeyManager "disk" {
plugin_data {
directory = "c:/spire/data/agent"
}
}
WorkloadAttestor "windows" {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
server {
bind_address = "127.0.0.1"
bind_port = "8081"
trust_domain = "domain.test"
log_file ="c:/spire/conf/server/logs.txt"
data_dir = "c:/spire/data/server"
log_level = "DEBUG"
}

plugins {
DataStore "sql" {
plugin_data {
database_type = "sqlite3"
connection_string = "c:/spire/data/server/datastore.sqlite3"
}
}
NodeAttestor "x509pop" {
plugin_data {
ca_bundle_path = "c:/spire/conf/server/agent-cacert.pem"
}
}
KeyManager "memory" {
plugin_data = {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: '3'
services:
spire-base:
image: spire-base:latest
volumes:
- ./conf/server:c:/spire/conf/server
- ./conf/agent:c:/spire/conf/agent
user: ContainerAdministrator
command:
- cmd /c ping -t localhost > NUL
6 changes: 6 additions & 0 deletions test/integration/suites-windows/windows-service/teardown
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

if [ -z "$SUCCESS" ]; then
docker-compose logs
fi
docker-down