From f5db2f2167651a308481a9a7f0d1aad982683c93 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 6 Dec 2024 17:02:39 +0100 Subject: [PATCH 01/13] Make fromSchema return the type name so that callers don't have to guess --- pkg/fakedoc/template.go | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index a6a67ed..858b247 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -395,15 +395,17 @@ func FromCSAFSchema() (*Template, error) { func FromSchema(schema *jsonschema.Schema) (*Template, error) { template := &Template{ Types: make(map[string]TmplNode), - Root: ShortLocation(schema), + Root: "", } - if err := template.fromSchema(schema); err != nil { + root, err := template.fromSchema(schema) + if err != nil { return nil, err } + template.Root = root return template, nil } -func (t *Template) fromSchema(schema *jsonschema.Schema) error { +func (t *Template) fromSchema(schema *jsonschema.Schema) (string, error) { name := ShortLocation(schema) // Check for recursion. If name is already in t.Types, we don't have @@ -411,13 +413,13 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { // building the node, otherwise the node has already been // contstructed. if _, ok := t.Types[name]; ok { - return nil + return name, nil } t.Types[name] = nil ty, tschema, err := getType(schema) if err != nil { - return err + return "", err } switch ty { case "object": @@ -428,12 +430,13 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { properties := []*Property{} for propName, prop := range tschema.Properties { - if err := t.fromSchema(prop); err != nil { - return err + propType, err := t.fromSchema(prop) + if err != nil { + return "", err } properties = append(properties, &Property{ Name: propName, - Type: ShortLocation(prop), + Type: propType, Required: required[propName], }) } @@ -448,11 +451,12 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { MaxProperties: tschema.MaxProperties, } case "array": - if err := t.fromSchema(tschema.Items2020); err != nil { - return err + itemsType, err := t.fromSchema(tschema.Items2020) + if err != nil { + return "", err } t.Types[name] = &TmplArray{ - Items: ShortLocation(tschema.Items2020), + Items: itemsType, MinItems: tschema.MinItems, MaxItems: tschema.MaxItems, UniqueItems: tschema.UniqueItems, @@ -460,10 +464,11 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { case "oneof": oneof := []string{} for _, alternative := range tschema.OneOf { - if err := t.fromSchema(alternative); err != nil { - return err + altType, err := t.fromSchema(alternative) + if err != nil { + return "", err } - oneof = append(oneof, ShortLocation(alternative)) + oneof = append(oneof, altType) } t.Types[name] = &TmplOneOf{OneOf: oneof} case "string": @@ -489,7 +494,7 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { if regexp != "" { pattern, err = CompileRegexp(regexp) if err != nil { - return nil + return "", err } } @@ -515,9 +520,9 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) error { Maximum: maximum, } default: - return fmt.Errorf("unexpected type: %s", ty) + return "", fmt.Errorf("unexpected type: %s", ty) } - return nil + return name, nil } func getType(schema *jsonschema.Schema) (string, *jsonschema.Schema, error) { From 69651b1438860d0b7dacb58718f79974c1233d1e Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 6 Dec 2024 17:45:29 +0100 Subject: [PATCH 02/13] Make schemas with the same underlying type use the same type name When deriving the template from the schema, some schemas have the same uderlying type, because the both use to refer to the same type. So far, the code correctly used the definition of that shared type, but used different names when putting it into the template's Types map. With this change, that name is derived from the shared underlying type. This leads to more sharing of types which makes it easier to deal with special handling of e.g. the product_id type which is referenced in many places. --- pkg/fakedoc/template.go | 55 +++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index 858b247..8b1bec5 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -405,31 +405,32 @@ func FromSchema(schema *jsonschema.Schema) (*Template, error) { return template, nil } -func (t *Template) fromSchema(schema *jsonschema.Schema) (string, error) { +func (t *Template) fromSchema(origschema *jsonschema.Schema) (string, error) { + ty, schema, err := getType(origschema) + if err != nil { + return "", err + } + name := ShortLocation(schema) // Check for recursion. If name is already in t.Types, we don't have // to do anything. If the associated value is nil, we're currently // building the node, otherwise the node has already been - // contstructed. + // constructed. if _, ok := t.Types[name]; ok { return name, nil } t.Types[name] = nil - ty, tschema, err := getType(schema) - if err != nil { - return "", err - } switch ty { case "object": - required := make(map[string]bool, len(tschema.Required)) - for _, name := range tschema.Required { + required := make(map[string]bool, len(schema.Required)) + for _, name := range schema.Required { required[name] = true } properties := []*Property{} - for propName, prop := range tschema.Properties { + for propName, prop := range schema.Properties { propType, err := t.fromSchema(prop) if err != nil { return "", err @@ -447,23 +448,23 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) (string, error) { t.Types[name] = &TmplObject{ Properties: properties, - MinProperties: tschema.MinProperties, - MaxProperties: tschema.MaxProperties, + MinProperties: schema.MinProperties, + MaxProperties: schema.MaxProperties, } case "array": - itemsType, err := t.fromSchema(tschema.Items2020) + itemsType, err := t.fromSchema(schema.Items2020) if err != nil { return "", err } t.Types[name] = &TmplArray{ Items: itemsType, - MinItems: tschema.MinItems, - MaxItems: tschema.MaxItems, - UniqueItems: tschema.UniqueItems, + MinItems: schema.MinItems, + MaxItems: schema.MaxItems, + UniqueItems: schema.UniqueItems, } case "oneof": oneof := []string{} - for _, alternative := range tschema.OneOf { + for _, alternative := range schema.OneOf { altType, err := t.fromSchema(alternative) if err != nil { return "", err @@ -472,21 +473,21 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) (string, error) { } t.Types[name] = &TmplOneOf{OneOf: oneof} case "string": - switch tschema.Format { + switch schema.Format { case "date-time": mindate := time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC) maxdate := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) t.Types[name] = &TmplDateTime{Minimum: &mindate, Maximum: &maxdate} default: enum := []string{} - for _, v := range tschema.Enum { + for _, v := range schema.Enum { enum = append(enum, v.(string)) } regexp := "" - if tschema.Pattern != nil { - regexp = tschema.Pattern.String() + if schema.Pattern != nil { + regexp = schema.Pattern.String() } - if tschema.Format == "uri" && regexp == "" { + if schema.Format == "uri" && regexp == "" { regexp = uriRegexp } @@ -499,20 +500,20 @@ func (t *Template) fromSchema(schema *jsonschema.Schema) (string, error) { } t.Types[name] = &TmplString{ - MinLength: tschema.MinLength, - MaxLength: tschema.MaxLength, + MinLength: schema.MinLength, + MaxLength: schema.MaxLength, Enum: enum, Pattern: pattern, } } case "number": var minimum, maximum *float32 - if tschema.Minimum != nil { - m, _ := tschema.Minimum.Float32() + if schema.Minimum != nil { + m, _ := schema.Minimum.Float32() minimum = &m } - if tschema.Maximum != nil { - m, _ := tschema.Maximum.Float32() + if schema.Maximum != nil { + m, _ := schema.Maximum.Float32() maximum = &m } t.Types[name] = &TmplNumber{ From ba48fdb596d75e6fde4a699a2698c33a1e7b674d Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 6 Dec 2024 19:26:54 +0100 Subject: [PATCH 03/13] Implement abandoning of subtrees of the generated tree more robustly During the construction of the tree for the random CSAF document, it can happen that a particular branch cannot be fully constructed, because some constraint cannot be met. The main reason for that so far is that the maximum depth would be exceeded. In this case the generateNode method returns an error that indicates this and the generator will try to recover from this by trying something else, like not generating the object property where the problem occurred and trying to add different one. The implementation of produt IDs will add onother reason for pruning a branch, so we introce an error value that forms the basis for all reasons for pruning. We also now handle these errors in more places, even though those are currently not places where these error can occur for the built-in template. --- pkg/fakedoc/generator.go | 44 +++++++++++++++++++++++++++++++--------- pkg/fakedoc/random.go | 8 ++++++++ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index 5f024e7..43d881f 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -24,11 +24,18 @@ import ( "github.com/go-loremipsum/loremipsum" ) +// ErrBranchAbandoned is the base errors that indicate that the +// generator should abandon a recursive descent and try again with a +// different branch. +// +// This error is mostly used internally in the generator and is unlikely +// to be returned from Generate method. +var ErrBranchAbandoned = errors.New("branch abandoned") + // ErrDepthExceeded is returned as error by the generator if exceeding // the maximum depth of the generated document could not be avoided. -// This is mostly used internally in the generator and is unlikely to be -// returned from Generate method. -var ErrDepthExceeded = errors.New("maximum depth exceeded") +// It is based on ErrBranchAbandoned +var ErrDepthExceeded = fmt.Errorf("%w: maximum depth exceeded", ErrBranchAbandoned) // ErrNoValidValue is returned as error by the generator if no value // that conforms to the constraints given in the template could be @@ -83,8 +90,7 @@ func (gen *Generator) generateNode(typename string, depth int) (any, error) { case *TmplArray: return gen.randomArray(node, depth) case *TmplOneOf: - typename := choose(gen.Rand, node.OneOf) - return gen.generateNode(typename, depth-1) + return gen.randomOneOf(node.OneOf, depth) case *TmplString: if len(node.Enum) > 0 { return choose(gen.Rand, node.Enum), nil @@ -201,6 +207,24 @@ generateItem: return item, nil } +func (gen *Generator) randomOneOf(oneof []string, depth int) (any, error) { + shuffled := shuffle(gen.Rand, oneof) + var abandoned error + for _, typename := range shuffled { + value, err := gen.generateNode(typename, depth-1) + if errors.Is(err, ErrBranchAbandoned) { + abandoned = err + continue + } + return value, err + } + + if abandoned != nil { + return nil, abandoned + } + return nil, fmt.Errorf("could not generate any of %v", oneof) +} + func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { properties := make(map[string]any) optional := make([]*Property, 0, len(node.Properties)) @@ -244,15 +268,15 @@ func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { // try. Generating a property may fail because the maximum depth // would be exceeded in which case we just try again with a // different property. - depthExceeded := false + var branchAbandoned error for extraProps > 0 && len(optional) > 0 { i := gen.Rand.IntN(len(optional)) prop := optional[i] optional = slices.Delete(optional, i, i+1) value, err := gen.generateNode(prop.Type, depth-1) switch { - case errors.Is(err, ErrDepthExceeded): - depthExceeded = true + case errors.Is(err, ErrBranchAbandoned): + branchAbandoned = err continue case err != nil: return nil, err @@ -266,8 +290,8 @@ func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { // failure is due to exceeding the maximum depth we report that to // the caller so that it can try something else. if len(properties) < minProps { - if depthExceeded { - return nil, ErrDepthExceeded + if branchAbandoned != nil { + return nil, branchAbandoned } return nil, fmt.Errorf("could not generate at least %d properties", minProps) } diff --git a/pkg/fakedoc/random.go b/pkg/fakedoc/random.go index 2b60512..a703119 100644 --- a/pkg/fakedoc/random.go +++ b/pkg/fakedoc/random.go @@ -21,6 +21,14 @@ func choose[T any](rand *rand.Rand, choices []T) T { return choices[rand.IntN(len(choices))] } +// shuffle randomly shuffles a slice +func shuffle[T any](rand *rand.Rand, ts []T) []T { + rand.Shuffle(len(ts), func(i, j int) { + ts[i], ts[j] = ts[j], ts[i] + }) + return ts +} + // ErrSeedFormat is the error returned by ParseSeed for incorrectly // formatted seed values. var ErrSeedFormat = errors.New( From 28fb6c3eae22730eba47a15d3fea7431862a91d7 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 6 Dec 2024 21:47:10 +0100 Subject: [PATCH 04/13] Better handling of generation failures of arrays with uniqueitems=true We can treat those cases like depth exceeded and simply not generate the array at all and let the parent try to do something else. --- pkg/fakedoc/generator.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index 43d881f..ed8f6aa 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -42,7 +42,8 @@ var ErrDepthExceeded = fmt.Errorf("%w: maximum depth exceeded", ErrBranchAbandon // generated. This can happen for arrays where UniqueItems is true, for // instance, if the minimum number of items is large compared to number // of different valid items. -var ErrNoValidValue = errors.New("could not generate valid value") +// It is based on ErrBranchAbandoned +var ErrNoValidValue = fmt.Errorf("%w: could not generate valid value", ErrBranchAbandoned) // ErrInvalidString is returned as error by the generator if the input // text is not valid UTF-8. This can happen if the input is a binary From 3cfafab40031ab7fe4b4dec7d7c7cf7928760a52 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 6 Dec 2024 21:48:22 +0100 Subject: [PATCH 05/13] Implement product IDs and references to them The product tree contains products that are identified with IDs that may be referenced from elsewhere in the document. Those references must be generated in such a way that they point to existing products. With this commit the can do that, although at the moment it doesn't work well enough yet. In about 2.5% of all attempts, no document will be generated at all. The basic approach is to have two new template types, one to generate IDs and another for places that reference them. During generation references are only created at all if at least one ID has been generated and they're initially added with a placeholder. Once the full tree has been generated, the placeholders are filled with IDs randomly chosen from all that were generated. Because this will have to be done with other kinds of IDs, such as group IDs, the new template types have a namespace parameter is used to distinguish them. --- pkg/fakedoc/generator.go | 117 ++++++++++++++++++++++++++++++++++++--- pkg/fakedoc/template.go | 108 ++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+), 7 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index ed8f6aa..d9a4679 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -9,6 +9,7 @@ package fakedoc import ( + "encoding/json" "errors" "fmt" "io" @@ -52,9 +53,38 @@ var ErrInvalidString = errors.New("not valid utf-8") // Generator is the type of CSAF document generators type Generator struct { - Template *Template - Rand *rand.Rand - FileCache map[string]string + Template *Template + Rand *rand.Rand + FileCache map[string]string + NameSpaces map[string]*NameSpace +} + +// NameSpace helps implement TmplID and TmplRef by collecting the IDs +// and references for a name space. It holds both values and references +// so that the references can be set to actually existing IDs once all +// IDs have been generated +type NameSpace struct { + Values []string + Refs []*reference +} + +func (ns *NameSpace) addValue(v string) { + ns.Values = append(ns.Values, v) +} + +func (ns *NameSpace) addRef(r *reference) { + ns.Refs = append(ns.Refs, r) +} + +// reference is the value of a node created for TmplRef during +// generation. It's serialized to JSON as a JSON string. +type reference struct { + namespace string + value string +} + +func (ref *reference) MarshalJSON() ([]byte, error) { + return json.Marshal(ref.value) } // NewGenerator creates a new Generator based on a Template and an @@ -65,15 +95,46 @@ func NewGenerator(tmpl *Template, rng *rand.Rand) *Generator { rng = rand.New(rand.NewPCG(rand.Uint64(), rand.Uint64())) } return &Generator{ - Template: tmpl, - Rand: rng, - FileCache: make(map[string]string), + Template: tmpl, + Rand: rng, + FileCache: make(map[string]string), + NameSpaces: make(map[string]*NameSpace), } } +func (gen *Generator) getNamespace(namespace string) *NameSpace { + if _, ok := gen.NameSpaces[namespace]; !ok { + gen.NameSpaces[namespace] = &NameSpace{} + } + return gen.NameSpaces[namespace] +} + +// addNSValue adds a value to a namespace +func (gen *Generator) addNSValue(namespace, v string) { + gen.getNamespace(namespace).addValue(v) +} + +// adNSRef adds a reference to a namespace +func (gen *Generator) adNSRef(namespace string, r *reference) { + gen.getNamespace(namespace).addRef(r) +} + +func (gen *Generator) hasNSValues(namespace string) bool { + return len(gen.getNamespace(namespace).Values) > 0 +} + // Generate generates a document func (gen *Generator) Generate() (any, error) { - return gen.generateNode(gen.Template.Root, 25) + doc, err := gen.generateNode(gen.Template.Root, 25) + if err != nil { + return nil, err + } + + if err = gen.fixupReferences(); err != nil { + return nil, err + } + + return doc, nil } func (gen *Generator) generateNode(typename string, depth int) (any, error) { @@ -104,6 +165,10 @@ func (gen *Generator) generateNode(typename string, depth int) (any, error) { return gen.loremIpsum(node.MinLength, node.MaxLength, node.Unit), nil case *TmplBook: return gen.book(node.MinLength, node.MaxLength, node.Path) + case *TmplID: + return gen.generateID(node.Namespace), nil + case *TmplRef: + return gen.generateReference(node.Namespace) case *TmplNumber: return gen.randomNumber(node.Minimum, node.Maximum), nil case *TmplDateTime: @@ -389,3 +454,41 @@ func (gen *Generator) book(minlength, maxlength int, path string) (string, error trimmed = trimmed[:length] return string(trimmed), nil } + +func (gen *Generator) generateID(namespace string) string { + id := gen.randomString(1, 20) + gen.addNSValue(namespace, id) + return id +} + +func (gen *Generator) generateReference(namespace string) (any, error) { + if !gen.hasNSValues(namespace) { + return nil, fmt.Errorf( + "%w: no IDs in namespace %q", ErrBranchAbandoned, namespace, + ) + } + + ref := &reference{ + namespace: namespace, + value: "", + } + gen.adNSRef(namespace, ref) + return ref, nil +} + +func (gen *Generator) fixupReferences() error { + for name, ns := range gen.NameSpaces { + for _, ref := range ns.Refs { + if len(ns.Values) == 0 { + // this should never happen because references should + // only be generated if there are values available + return fmt.Errorf( + "no IDs when filling references in namespace %q", + name, + ) + } + ref.value = choose(gen.Rand, ns.Values) + } + } + return nil +} diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index 8b1bec5..9c47c5b 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -21,6 +21,11 @@ import ( const ( uriRegexp = `(https?)://(example\.(com|org|net)|[a-zA-Z][a-zA-Z0-9]{10}\.example(/[a-zA-Z0-9.-]{1,10}){3})` + + // constants for the 'synthetic' template type for the product ID + // generator + productIDTypeName = "fakedoc:product_id_generator" + productIDNamespace = "product_id" ) // Template describes the structure of the CSAF document to generate @@ -318,6 +323,52 @@ func (t *TmplBook) FromToml(md toml.MetaData, primType toml.Primitive) error { return nil } +// TmplID describes how to generate IDs that may be referenced from +// elsewhere in the document by TmplRef types using the same namespace. +type TmplID struct { + // Namespace is the namespace for the IDs + Namespace string `toml:"namespace"` +} + +// AsMap implements TmplNode +func (t *TmplID) AsMap() map[string]any { + return map[string]any{ + "type": "id", + "namespace": t.Namespace, + } +} + +// FromToml implements TmplNode +func (t *TmplID) FromToml(md toml.MetaData, primType toml.Primitive) error { + if err := md.PrimitiveDecode(primType, t); err != nil { + return err + } + return nil +} + +// TmplRef generate strings that are chosen from the IDs generated for +// the TmplID with the same namespace +type TmplRef struct { + // Namespace is the namespace for the IDs + Namespace string `toml:"namespace"` +} + +// AsMap implements TmplNode +func (t *TmplRef) AsMap() map[string]any { + return map[string]any{ + "type": "ref", + "namespace": t.Namespace, + } +} + +// FromToml implements TmplNode +func (t *TmplRef) FromToml(md toml.MetaData, primType toml.Primitive) error { + if err := md.PrimitiveDecode(primType, t); err != nil { + return err + } + return nil +} + // TmplNumber describes how to generate numbers type TmplNumber struct { // Minimum is the minum value of the generated numbers @@ -402,6 +453,11 @@ func FromSchema(schema *jsonschema.Schema) (*Template, error) { return nil, err } template.Root = root + + if err := template.applyCSAFSpecials(); err != nil { + return nil, err + } + return template, nil } @@ -554,6 +610,54 @@ func getSimpleType(types []string) (string, error) { return types[0], nil } +func (t *Template) applyCSAFSpecials() error { + t.Types[productIDTypeName] = &TmplID{ + Namespace: productIDNamespace, + } + + if err := t.overwritePropertyType( + "csaf:#/$defs/full_product_name_t", + "product_id", + productIDTypeName, + ); err != nil { + return err + } + + return t.overwriteType( + "csaf:#/$defs/product_id_t", + &TmplRef{ + Namespace: productIDNamespace, + }, + ) +} + +func (t *Template) overwriteType(typename string, tmpl TmplNode) error { + _, ok := t.Types[typename] + if !ok { + return fmt.Errorf("type %s does not exist", typename) + } + t.Types[typename] = tmpl + return nil +} + +func (t *Template) overwritePropertyType(typename, propname, proptype string) error { + tmpl, ok := t.Types[typename] + if !ok { + return fmt.Errorf("type %s does not exist", typename) + } + product, ok := tmpl.(*TmplObject) + if !ok { + return fmt.Errorf("type %s is not a TmplObject", typename) + } + for _, prop := range product.Properties { + if prop.Name == propname { + prop.Type = proptype + return nil + } + } + return fmt.Errorf("type %s has no property %s", typename, propname) +} + // LoadTemplate loads a template from a TOML file. func LoadTemplate(file string) (*Template, error) { var template struct { @@ -608,6 +712,10 @@ func decodeType(md toml.MetaData, primType toml.Primitive) (TmplNode, error) { node = new(TmplLorem) case "book": node = new(TmplBook) + case "id": + node = new(TmplID) + case "ref": + node = new(TmplRef) case "number": node = new(TmplNumber) case "date-time": From c882192286fc10e5a368eac65cfb05ac62a2325c Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Tue, 10 Dec 2024 22:53:52 +0100 Subject: [PATCH 06/13] Add special case for generating arrays of unique references When generating group_ids we need to be able to generate arrays of with at least two product_id references all of which have to be different. With the current approach that doesn't work because the randomArray method ends up instantiating TmplRef a bunch of times, but all the reference values generated are equal as they don't have an actual value yet, and because they're equal randomArray rejects all but the first one because of the uniqueness requirement and then the array is too short because it must contain at least two values. The solution chosen here is to add a special case to randomArray: if the items are TmplRef and there's a unique constraint we add a placeholde that represents an array of distinct references which are filled in at the end like the other references. This new kind of reference is represented by the same struct type as the other reference placeholder, but it now has an dditional length field that is used to distinguish the cases (see the comments for the reference struct). --- pkg/fakedoc/generator.go | 60 +++++++++++++++++++++++++++++++--------- pkg/fakedoc/random.go | 12 ++++++++ 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index d9a4679..0efaefa 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -76,15 +76,24 @@ func (ns *NameSpace) addRef(r *reference) { ns.Refs = append(ns.Refs, r) } -// reference is the value of a node created for TmplRef during -// generation. It's serialized to JSON as a JSON string. +// reference is the value of a node created for TmplRef or arrays of +// TmplRef during generation. In the former case it represents a single +// reference serialized to JSON as a JSON string. In the latter case +// it's a slice of references serialized as a JSON array of strings. +// The length field indicates which variant it is. type reference struct { namespace string - value string + // length is less than zero to indicate a single reference, greater + // or equal to zero to indicate an array + length int + values []string } func (ref *reference) MarshalJSON() ([]byte, error) { - return json.Marshal(ref.value) + if ref.length < 0 { + return json.Marshal(ref.values[0]) + } + return json.Marshal(ref.values) } // NewGenerator creates a new Generator based on a Template and an @@ -123,6 +132,10 @@ func (gen *Generator) hasNSValues(namespace string) bool { return len(gen.getNamespace(namespace).Values) > 0 } +func (gen *Generator) numNSValues(namespace string) int { + return len(gen.getNamespace(namespace).Values) +} + // Generate generates a document func (gen *Generator) Generate() (any, error) { doc, err := gen.generateNode(gen.Template.Root, 25) @@ -207,6 +220,19 @@ func (gen *Generator) randomArray(tmpl *TmplArray, depth int) (any, error) { maxitems = minitems + 2 } + if refnode, ok := gen.Template.Types[tmpl.Items].(*TmplRef); ok { + known := gen.numNSValues(refnode.Namespace) + if known >= minitems && tmpl.UniqueItems { + ref := &reference{ + namespace: refnode.Namespace, + length: minitems + gen.Rand.IntN(known-minitems+1), + values: nil, + } + gen.adNSRef(refnode.Namespace, ref) + return ref, nil + } + } + length := minitems + gen.Rand.IntN(maxitems-minitems+1) items := make([]any, 0, length) notInItems := func(v any) bool { @@ -470,7 +496,8 @@ func (gen *Generator) generateReference(namespace string) (any, error) { ref := &reference{ namespace: namespace, - value: "", + length: -1, + values: nil, } gen.adNSRef(namespace, ref) return ref, nil @@ -478,16 +505,23 @@ func (gen *Generator) generateReference(namespace string) (any, error) { func (gen *Generator) fixupReferences() error { for name, ns := range gen.NameSpaces { + if len(ns.Values) == 0 && len(ns.Refs) > 0 { + // this should never happen because references should + // only be generated if there are values available + return fmt.Errorf( + "no IDs when filling references in namespace %q", + name, + ) + } for _, ref := range ns.Refs { - if len(ns.Values) == 0 { - // this should never happen because references should - // only be generated if there are values available - return fmt.Errorf( - "no IDs when filling references in namespace %q", - name, - ) + switch { + case ref.length < 0: + ref.values = []string{choose(gen.Rand, ns.Values)} + case ref.length == 0: + ref.values = nil + default: + ref.values = chooseK(gen.Rand, ref.length, ns.Values) } - ref.value = choose(gen.Rand, ns.Values) } } return nil diff --git a/pkg/fakedoc/random.go b/pkg/fakedoc/random.go index a703119..49bb83f 100644 --- a/pkg/fakedoc/random.go +++ b/pkg/fakedoc/random.go @@ -21,6 +21,18 @@ func choose[T any](rand *rand.Rand, choices []T) T { return choices[rand.IntN(len(choices))] } +// chooseK returns a slice of K different randomly chosen elements of +// choices. Assumes that choices has at least K elements and that all +// elements are different. +func chooseK[T any](rand *rand.Rand, k int, choices []T) []T { + perm := rand.Perm(len(choices)) + chosen := make([]T, k) + for i := range k { + chosen[i] = choices[perm[i]] + } + return chosen +} + // shuffle randomly shuffles a slice func shuffle[T any](rand *rand.Rand, ts []T) []T { rand.Shuffle(len(ts), func(i, j int) { From 6a957d1879837e2b4e61c58cb0c4496dab33ee3a Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Tue, 10 Dec 2024 23:15:25 +0100 Subject: [PATCH 07/13] Add dependency relation between properties of the same object When generating product groups, we need to generate the product_ids property before the group_id property. Group IDs are defined by the product_groups items. If generating the product_ids for a group fails because e.g. there are no product_ids the entire product group fails and will not be in the document. If we had already generated a group_id for the group the generator would have a known group ID and therefore happily generate references to it elsewere in et document, but the document would not actually define it leading to dangling references. The dependency between properties allows us to indicate that group_id depends on product_ids and therefore the latter must be attempted first. --- pkg/fakedoc/generator.go | 70 ++++++++++++++++++++++++++++++++++++---- pkg/fakedoc/template.go | 1 + 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index 0efaefa..cc5e961 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -317,22 +317,73 @@ func (gen *Generator) randomOneOf(oneof []string, depth int) (any, error) { return nil, fmt.Errorf("could not generate any of %v", oneof) } +func orderByDependency(props []*Property) ([]*Property, error) { + pm := make(map[string]*Property) + seen := make(map[string]bool) + var hasdeps []*Property + var sorted []*Property + for _, prop := range props { + pm[prop.Name] = prop + switch prop.Depends { + case "": + sorted = append(sorted, prop) + seen[prop.Name] = true + default: + hasdeps = append(hasdeps, prop) + } + } + +outer: + for len(hasdeps) != 0 { + for i, prop := range hasdeps { + dep, ok := pm[prop.Depends] + if !ok { + return nil, fmt.Errorf( + "property %s depends on unknown propety %s", + prop.Name, + prop.Depends, + ) + } + if !seen[dep.Name] { + continue + } + sorted = append(sorted, prop) + seen[prop.Name] = true + hasdeps = slices.Delete(hasdeps, i, i+1) + continue outer + } + return nil, fmt.Errorf("could not determine dependency order") + } + + return sorted, nil +} + func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { - properties := make(map[string]any) optional := make([]*Property, 0, len(node.Properties)) + required := make([]*Property, 0, len(node.Properties)) for _, prop := range node.Properties { switch { case prop.Required: - value, err := gen.generateNode(prop.Type, depth-1) - if err != nil { - return nil, err - } - properties[prop.Name] = value + required = append(required, prop) default: optional = append(optional, prop) } } + required, err := orderByDependency(required) + if err != nil { + return nil, err + } + + properties := make(map[string]any) + for _, prop := range required { + value, err := gen.generateNode(prop.Type, depth-1) + if err != nil { + return nil, err + } + properties[prop.Name] = value + } + // Choose a value for extraProps, the number of optional properties // to add based on how many we need at least, node.MinProperties, // and how many we may have at most, node.MaxProperties. Both of @@ -365,6 +416,13 @@ func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { i := gen.Rand.IntN(len(optional)) prop := optional[i] optional = slices.Delete(optional, i, i+1) + if prop.Depends != "" && properties[prop.Depends] == nil { + // prop dependency hasn't been generated yet, try something + // else first. Note: this test branch happens after the + // property has been removed from optional to avoid infinite + // loops. + continue + } value, err := gen.generateNode(prop.Type, depth-1) switch { case errors.Is(err, ErrBranchAbandoned): diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index 9c47c5b..e966e4e 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -72,6 +72,7 @@ type Property struct { Name string `toml:"name"` Type string `toml:"type"` Required bool `toml:"required"` + Depends string `toml:"depends"` } // TmplObject describes a JSON object From 51be626aef024de95561ec026635cd656186265b Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Wed, 11 Dec 2024 20:24:58 +0100 Subject: [PATCH 08/13] Add special handling for group_ids The group IDs are now handled basically in the same way as the product IDs. The built-in template is automatically modified with group ID specific settings that mostly work like the ones for the product IDs, with one exception, the group IDs use the dependency mechanism between properties to make sure that in product_group objects the group ID is only generated when there are product IDs available. --- pkg/fakedoc/template.go | 58 ++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index e966e4e..167b603 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -10,6 +10,7 @@ package fakedoc import ( "cmp" + "errors" "fmt" "io" "slices" @@ -26,6 +27,8 @@ const ( // generator productIDTypeName = "fakedoc:product_id_generator" productIDNamespace = "product_id" + groupIDTypeName = "fakedoc:group_id_generator" + groupIDNamespace = "group_id" ) // Template describes the structure of the CSAF document to generate @@ -615,21 +618,48 @@ func (t *Template) applyCSAFSpecials() error { t.Types[productIDTypeName] = &TmplID{ Namespace: productIDNamespace, } + t.Types[groupIDTypeName] = &TmplID{ + Namespace: groupIDNamespace, + } + + var errs []error + collectErr := func(err error) { + errs = append(errs, err) + } - if err := t.overwritePropertyType( + collectErr(t.modifyProperty( "csaf:#/$defs/full_product_name_t", "product_id", - productIDTypeName, - ); err != nil { - return err - } + func(p *Property) error { + p.Type = productIDTypeName + return nil + }, + )) + + collectErr(t.modifyProperty( + "csaf:#/properties/product_tree/properties/product_groups/items", + "group_id", + func(p *Property) error { + p.Type = groupIDTypeName + p.Depends = "product_ids" + return nil + }, + )) - return t.overwriteType( + collectErr(t.overwriteType( "csaf:#/$defs/product_id_t", &TmplRef{ Namespace: productIDNamespace, }, - ) + )) + collectErr(t.overwriteType( + "csaf:#/$defs/product_group_id_t", + &TmplRef{ + Namespace: groupIDNamespace, + }, + )) + + return errors.Join(errs...) } func (t *Template) overwriteType(typename string, tmpl TmplNode) error { @@ -641,19 +671,21 @@ func (t *Template) overwriteType(typename string, tmpl TmplNode) error { return nil } -func (t *Template) overwritePropertyType(typename, propname, proptype string) error { +func (t *Template) modifyProperty( + typename, propname string, + modify func(*Property) error, +) error { tmpl, ok := t.Types[typename] if !ok { return fmt.Errorf("type %s does not exist", typename) } - product, ok := tmpl.(*TmplObject) + obj, ok := tmpl.(*TmplObject) if !ok { return fmt.Errorf("type %s is not a TmplObject", typename) } - for _, prop := range product.Properties { - if prop.Name == propname { - prop.Type = proptype - return nil + for _, p := range obj.Properties { + if p.Name == propname { + return modify(p) } } return fmt.Errorf("type %s has no property %s", typename, propname) From 3b2b0503e1998e6bbc186157aae2ebcca8ae9a8c Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Wed, 11 Dec 2024 20:59:50 +0100 Subject: [PATCH 09/13] Omit optional fields with default values from properties in TOML The required and depends fields of properties can be omitted if they have default values. Since they have the defaults for many properties, they're omitted from the TOML serialization of the built-in template. --- pkg/fakedoc/template.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index 167b603..eda2d37 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -94,9 +94,24 @@ type TmplObject struct { // AsMap implements TmplNode func (t *TmplObject) AsMap() map[string]any { + var props []map[string]any + for _, p := range t.Properties { + m := map[string]any{ + "name": p.Name, + "type": p.Type, + } + if p.Required { + m["required"] = p.Required + } + if p.Depends != "" { + m["depends"] = p.Depends + } + props = append(props, m) + } + m := map[string]any{ "type": "object", - "properties": t.Properties, + "properties": props, } if t.MinProperties != -1 { m["minproperties"] = t.MinProperties From 6b3947f0e6c367f7ded7138c2485596269e0e48b Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Thu, 12 Dec 2024 20:06:46 +0100 Subject: [PATCH 10/13] Implement a better way to deal with group_ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The depends attribute for properties is a work around for the problem that fakedoc could generate references to non existing group IDs, which happened because once an ID was generated it couldn't be removed from the name space even if the branch for which it was generated had been abandoned during generation for e.g. lack of product IDs. See 6a957d1 for more details. As it turns out, there's an easy way of removing IDs generated in abandoned branches¹: We take a snapshot of the generator's name space state before attempting to create a branch (basically the entirety of generateNode), attempt to create the branch, and if it fails because of an error based on ErrBranchAbandoned we restore the generator's namespace state to the snapshot. The key insight for why this works, is that if during the attempt to generate the branch, any group IDs have been created then any references that might have been created in that branch because of the existence of those IDs are also in that branch. So restoring the snapshot removes all the consequences of ID generation that happened in the attempt. So, forcing the order in which some properties are generated is not needed any more. It was a work around that only worked in highly specific circumstances (it was introduced specifically for product groups) and is hard to explain to users who want to modify the templates because it requires somewhat detailed knowlege about how fakedoc works. The new approach works better and doesn't need to be understood by users for IDs to work. ¹ branch in the generic tree sense, not necessarily CSAF branches_t --- pkg/fakedoc/generator.go | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index cc5e961..6c09af0 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -76,6 +76,13 @@ func (ns *NameSpace) addRef(r *reference) { ns.Refs = append(ns.Refs, r) } +func (ns *NameSpace) snapshot() *NameSpace { + return &NameSpace{ + Values: ns.Values, + Refs: ns.Refs, + } +} + // reference is the value of a node created for TmplRef or arrays of // TmplRef during generation. In the former case it represents a single // reference serialized to JSON as a JSON string. In the latter case @@ -136,6 +143,18 @@ func (gen *Generator) numNSValues(namespace string) int { return len(gen.getNamespace(namespace).Values) } +func (gen *Generator) snapshotNamespaces() map[string]*NameSpace { + copy := make(map[string]*NameSpace, len(gen.NameSpaces)) + for name, ns := range gen.NameSpaces { + copy[name] = ns.snapshot() + } + return copy +} + +func (gen *Generator) restoreSnapshot(snapshot map[string]*NameSpace) { + gen.NameSpaces = snapshot +} + // Generate generates a document func (gen *Generator) Generate() (any, error) { doc, err := gen.generateNode(gen.Template.Root, 25) @@ -150,7 +169,7 @@ func (gen *Generator) Generate() (any, error) { return doc, nil } -func (gen *Generator) generateNode(typename string, depth int) (any, error) { +func (gen *Generator) generateNode(typename string, depth int) (_ any, err error) { if depth <= 0 { return nil, ErrDepthExceeded } @@ -159,6 +178,17 @@ func (gen *Generator) generateNode(typename string, depth int) (any, error) { if !ok { return nil, fmt.Errorf("unknown type '%s'", typename) } + + // make sure IDs generated in abandoned branches are discarded so + // that we don't end up with e.g. references to group IDs that are + // not actually there. + snapshot := gen.snapshotNamespaces() + defer func() { + if errors.Is(err, ErrBranchAbandoned) { + gen.restoreSnapshot(snapshot) + } + }() + switch node := nodeTmpl.(type) { case *TmplObject: return gen.generateObject(node, depth) From 192e6bd0ab7722daab822007d49cb215184d485d Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Thu, 12 Dec 2024 20:41:40 +0100 Subject: [PATCH 11/13] Remove Property's Depends field and related code This was only introduced to work around problems with group IDs which have been solved in a differen way with 6b3947f --- pkg/fakedoc/generator.go | 56 +--------------------------------------- pkg/fakedoc/template.go | 5 ---- 2 files changed, 1 insertion(+), 60 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index 6c09af0..25a1ac4 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -347,50 +347,8 @@ func (gen *Generator) randomOneOf(oneof []string, depth int) (any, error) { return nil, fmt.Errorf("could not generate any of %v", oneof) } -func orderByDependency(props []*Property) ([]*Property, error) { - pm := make(map[string]*Property) - seen := make(map[string]bool) - var hasdeps []*Property - var sorted []*Property - for _, prop := range props { - pm[prop.Name] = prop - switch prop.Depends { - case "": - sorted = append(sorted, prop) - seen[prop.Name] = true - default: - hasdeps = append(hasdeps, prop) - } - } - -outer: - for len(hasdeps) != 0 { - for i, prop := range hasdeps { - dep, ok := pm[prop.Depends] - if !ok { - return nil, fmt.Errorf( - "property %s depends on unknown propety %s", - prop.Name, - prop.Depends, - ) - } - if !seen[dep.Name] { - continue - } - sorted = append(sorted, prop) - seen[prop.Name] = true - hasdeps = slices.Delete(hasdeps, i, i+1) - continue outer - } - return nil, fmt.Errorf("could not determine dependency order") - } - - return sorted, nil -} - func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { - optional := make([]*Property, 0, len(node.Properties)) - required := make([]*Property, 0, len(node.Properties)) + var optional, required []*Property for _, prop := range node.Properties { switch { case prop.Required: @@ -400,11 +358,6 @@ func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { } } - required, err := orderByDependency(required) - if err != nil { - return nil, err - } - properties := make(map[string]any) for _, prop := range required { value, err := gen.generateNode(prop.Type, depth-1) @@ -446,13 +399,6 @@ func (gen *Generator) generateObject(node *TmplObject, depth int) (any, error) { i := gen.Rand.IntN(len(optional)) prop := optional[i] optional = slices.Delete(optional, i, i+1) - if prop.Depends != "" && properties[prop.Depends] == nil { - // prop dependency hasn't been generated yet, try something - // else first. Note: this test branch happens after the - // property has been removed from optional to avoid infinite - // loops. - continue - } value, err := gen.generateNode(prop.Type, depth-1) switch { case errors.Is(err, ErrBranchAbandoned): diff --git a/pkg/fakedoc/template.go b/pkg/fakedoc/template.go index eda2d37..7f781f3 100644 --- a/pkg/fakedoc/template.go +++ b/pkg/fakedoc/template.go @@ -75,7 +75,6 @@ type Property struct { Name string `toml:"name"` Type string `toml:"type"` Required bool `toml:"required"` - Depends string `toml:"depends"` } // TmplObject describes a JSON object @@ -103,9 +102,6 @@ func (t *TmplObject) AsMap() map[string]any { if p.Required { m["required"] = p.Required } - if p.Depends != "" { - m["depends"] = p.Depends - } props = append(props, m) } @@ -656,7 +652,6 @@ func (t *Template) applyCSAFSpecials() error { "group_id", func(p *Property) error { p.Type = groupIDTypeName - p.Depends = "product_ids" return nil }, )) From 35506b5ef78fdc4c89fe6929783a21a113e92d1d Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Thu, 12 Dec 2024 21:10:53 +0100 Subject: [PATCH 12/13] Add documentation for id and ref --- docs/templates.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/templates.md b/docs/templates.md index a897cd0..45ff000 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -293,3 +293,31 @@ require custom text. type = "book" path = "moby-dick.txt" ``` + +#### `id` and `ref` + +Together, these are used to generate IDs in the document, such as +product IDs and group IDs, that are defined in the document in one place +and referenced in other places in the document. To support multiple +kinds of IDs, there can be any number of namespaces for IDs. Both `id` +and `ref` have a `namespace` attribute that indicates which namespace to +use. Which namespaces exist is implicitly defined by which namespaces +are mentioned in the `id` templates. + + +##### Attributes + +* `namespace`: String with the name of the namespace of the IDs. + + +##### Example + +``` toml + [types."fakedoc:product_id_generator"] + namespace = "product_id" + type = "id" + + [types."csaf:#/$defs/product_id_t"] + namespace = "product_id" + type = "ref" +``` From 49635411c02e5484ea4aab7c4640c132707df3ff Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Fri, 13 Dec 2024 16:49:09 +0100 Subject: [PATCH 13/13] Rename local variable to avoid shadowing a built-in --- pkg/fakedoc/generator.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/fakedoc/generator.go b/pkg/fakedoc/generator.go index 25a1ac4..5d8e8b0 100644 --- a/pkg/fakedoc/generator.go +++ b/pkg/fakedoc/generator.go @@ -144,11 +144,11 @@ func (gen *Generator) numNSValues(namespace string) int { } func (gen *Generator) snapshotNamespaces() map[string]*NameSpace { - copy := make(map[string]*NameSpace, len(gen.NameSpaces)) + snap := make(map[string]*NameSpace, len(gen.NameSpaces)) for name, ns := range gen.NameSpaces { - copy[name] = ns.snapshot() + snap[name] = ns.snapshot() } - return copy + return snap } func (gen *Generator) restoreSnapshot(snapshot map[string]*NameSpace) {