Skip to content

Commit

Permalink
review baseline reitrieving summary/baseline from service/API
Browse files Browse the repository at this point in the history
  • Loading branch information
mtulio committed Aug 1, 2024
1 parent 48e1612 commit 0a7c3fc
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 156 deletions.
90 changes: 90 additions & 0 deletions docs/opct/adm/baseline.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# opct adm baseline [actions]

Manage baseline artifacts on OPCT backend.

> Note: This is administrative task, if you are not managing OPCT backends, skip this document.
OPCT baseline artifacts are conformance executions that have been accepted to
be used as a reference results during the review process.

The baselines artifacts are automated executions that is automatically published to
the OPCT services.

The `report` command consumes automatically the latest valid result from an specific
`OpenShift Version` and `Platform Type` in the filter pipeline (`Failed Filter APIP`),
making the inference of common failure in that specific release which **may** not be directly
related with the environment that is validated.

To begging with, explore the Usage section.

## Usage

Commands:
- `opct adm baseline list`: List baselines available.
- `opct adm baseline get`: Get a specific baseline summary.
- `opct adm baseline publish`: (restricted) Publish artifacts to the OPCT services.
- `opct adm baseline indexer`: (restricted) Re-index the report service to serve the baseline summary.

## Examples

- List the latest summary's artifacts by version and platform type:

```bash
$ opct adm baseline list
+---------------+--------+-------------------+--------------+------------------------------+
| ID | TYPE | OPENSHIFT VERSION | PLATFORMTYPE | NAME |
+---------------+--------+-------------------+--------------+------------------------------+
| 4.15_External | latest | 4.15 | External | 4.15_External_20240228043414 |
| 4.15_None | latest | 4.15 | None | 4.15_None_20240228041900 |
+---------------+--------+-------------------+--------------+------------------------------+
```

- List all available summary artifacts:

```bash
$ opct adm baseline list --all
+--------+---------+----------+----------+------------------------------+
| LATEST | VERSION | PLATFORM | PROVIDER | NAME |
+--------+---------+----------+----------+------------------------------+
| | 4.15 | External | vsphere | 4.15_External_20240110044423 |
| | 4.15 | External | vsphere | 4.15_External_20240221044618 |
| * | 4.15 | External | vsphere | 4.15_External_20240228043414 |
| | 4.15 | None | None | 4.15_None_20240221041256 |
| * | 4.15 | None | None | 4.15_None_20240228041900 |
+--------+---------+----------+----------+------------------------------+
```

- Review the summary for a latest artifact from a specific release:

```bash
$ opct adm baseline get --platform=External --version=4.15 -o /tmp/baseline-summary.json
```

- Publish many artifacts to the OPCT services (**administrative only**):

```bash
export PROCESS_FILES="4.15.0-rc.7-20240221-HighlyAvailable-vsphere-None.tar.gz
4.15.0-rc.7-20240221-HighlyAvailable-vsphere-External.tar.gz
4.15.0-rc.1-20240110-HighlyAvailable-vsphere-External.tar.gz
4.15.0-20240228-HighlyAvailable-vsphere-None.tar.gz
4.15.0-20240228-HighlyAvailable-vsphere-External.tar.gz"

# Upload each baseline artifact
for PF in $PROCESS_FILES;
do
opct adm baseline publish --log-level=debug "$HOME/opct/s3-bucket-results/v0.4.0/default/$PF";
done

# re-index
opct adm baseline indexer

# Expire CloudFront cache if you received an error:
# - AWS Console: AWS CloudFront > Distributions > Select Distribution > Invalidations > Create new expiring '/*'
# - AWS CLI: $ aws cloudfront create-invalidation --distribution-id <id> --paths /*

# Check the latest baseline data
opct-devel adm baseline list --all

# check all baseline data
opct-devel adm baseline list
```
26 changes: 15 additions & 11 deletions internal/opct/summary/consolidated.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os"
"regexp"
"sort"
"strings"

// "github.com/hashicorp/go-retryablehttp"
log "github.com/sirupsen/logrus"
Expand Down Expand Up @@ -373,7 +374,7 @@ func (cs *ConsolidatedSummary) applyFilterBaselineAPI() error {
// loadBaselineFromAPI query the the OPCT "backend" looking for the baseline results.
func (cs *ConsolidatedSummary) loadBaselineFromAPI() error {
if os.Getenv("OPCT_DISABLE_FILTER_BASELINE") == "1" {
log.Warnf("Unable to load baseline from API, filter Baseline is explicited disabled by OPCT_DISABLE_FILTER_BASELINE")
log.Warnf("Filter pipeline: Basline API is explicity disabled by OPCT_DISABLE_FILTER_BASELINE, skipping the discoverying baseline results from API")
return nil
}
// Path to S3 Object /api/v0/result/summary/{ocpVersion}/{platformType}
Expand All @@ -385,18 +386,18 @@ func (cs *ConsolidatedSummary) loadBaselineFromAPI() error {
// baseURL := "https://d23912a6309zf7.cloudfront.net/api/v0"

// Result to evaluate before returning failure
ocpVersion, err := cs.Provider.OpenShift.GetClusterVersionXY()
ocpRelease, err := cs.Provider.OpenShift.GetClusterVersionXY()
if err != nil {
os, err := cs.Provider.OpenShift.GetClusterVersion()
if err != nil {
return errors.Errorf("Error getting cluster version: %v", err)
}
ocpVersion = os.Desired
ocpRelease = fmt.Sprintf("%s.%s", strings.Split(os.Desired, ".")[0], strings.Split(os.Desired, ".")[1])
}
platformType := cs.Provider.OpenShift.GetInfrastructurePlatformType()

cs.BaselineAPI = baseline.NewBaselineReportSummary()
if err := cs.BaselineAPI.GetLatestSummaryByPlatform(ocpVersion, platformType); err != nil {
if err := cs.BaselineAPI.GetLatestRawSummaryFromPlatformWithFallback(ocpRelease, platformType); err != nil {
return errors.Wrap(err, "failed to get baseline from API")
}

Expand Down Expand Up @@ -441,15 +442,10 @@ func (cs *ConsolidatedSummary) applyFilterBaselineAPIForPlugin(pluginName string
if b != nil {
e2eFailuresBaseline, err = b.GetPriorityFailuresFromPlugin(pluginName)
if err != nil {
log.Errorf("failed to get priority failures from plugin: %w", err)
log.Errorf("failed to get priority failures from plugin: %v", err)
}
}

if skipFilter {
log.Warn("Filter Baseline was explicited disabled by OPCT_DISABLE_FILTER_BASELINE")
providerSummary.FailedFilter4 = providerSummary.FailedFilter3
}

e2eFailuresPipeline := providerSummary.FailedFilter3
hashBaseline := make(map[string]struct{}, len(e2eFailuresPipeline))

Expand All @@ -465,10 +461,18 @@ func (cs *ConsolidatedSummary) applyFilterBaselineAPIForPlugin(pluginName string
}
providerSummary.FailedExcludedFilter4 = append(providerSummary.FailedExcludedFilter4, v)
}

// feed the pipeline with the same tests when the filter is disabled.
if skipFilter {
log.Warn("Filter pipeline: Basline API is explicity disabled by OPCT_DISABLE_FILTER_BASELINE, using Filter3 to keep processing failures")
providerSummary.FailedFilter4 = providerSummary.FailedFilter3
}
sort.Strings(providerSummary.FailedFilter4)

log.Debugf("Debug filter BaselineAPI on pipeline for plugin %s: e2eFailuresBaseline(%d) e2eFailuresPipeline(%d) FailedFilter4(%d) FailedExcludedFilter4(%d)\n",
pluginName, len(e2eFailuresBaseline), len(e2eFailuresPipeline),
len(providerSummary.FailedFilter4), len(providerSummary.FailedExcludedFilter4))
sort.Strings(providerSummary.FailedFilter4)

return nil
}

Expand Down
103 changes: 62 additions & 41 deletions internal/report/baseline/baseline.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
// Package baseline holds the baseline report summary data and the functions to
// interact with the results service, backed by CloudFront and S3 storage bucket,
// serving summarized results from CI.
// "Baseline" results are valid/accepted CI executions. The results are processed
// and consumed by OPCT CLI 'report' command to compare the results of the validation
// tests. Those are CI results from reference installations which are used to compare
// the results from custom executions targetting to inference persistent failures,
// helping to isolate:
// - Flaky tests
// - Permanent failures
// - Test environment issues
package baseline

import (
Expand All @@ -14,7 +25,7 @@ import (
)

const (
bucketNameBaselineReportSummary = "opct-results"
bucketNameBaselineReportSummary = "opct-archive"
indexObjectKey = "api/v0/result/summary/index.json"
objectPathBaselineReportSummaryPath = "/result/summary/index.json"

Expand All @@ -24,11 +35,12 @@ const (
// The original bucket[1], must be migrated to another account and the CloudFront URL,
// is part of that goal without disrupting the current process.
// [1] "https://openshift-provider-certification.s3.us-west-2.amazonaws.com"
reportBaseURL = "https://d23912a6309zf7.cloudfront.net/api/v0"
reportBaseURL = "https://d23912a6309zf7.cloudfront.net"
cloudfrontDistributionID = "E3MJR7MT6EHHJC"

opctStorageBucketName = "openshift-provider-certification"
opctStorageBucketRegion = "us-west-2"
// To override those values use environment variables OPCT_EXP_BUCKET_NAME and OPCT_EXP_BUCKET_REGION
opctStorageBucketName = "opct-archive"
opctStorageBucketRegion = "us-east-1"
)

// BaselineReport is the struct that holds the baseline report data
Expand All @@ -45,14 +57,31 @@ type BaselineConfig struct {
buffer *BaselineData
}

// NewBaselineReportSummary creates a new BaselineConfig struct with the default
// configuration allowing customization to chage the S3 storage used in the management
// tasks.
// TODO deprecate the environment variables when backend is fully migrated to dedicated
// AWS account.
func NewBaselineReportSummary() *BaselineConfig {
bucketName := opctStorageBucketName
bucketRegion := opctStorageBucketRegion
if os.Getenv("OPCT_EXP_BUCKET_NAME") != "" {
log.Warnf("NewBaselineReportSummary() Using custom bucket name: %s", os.Getenv("OPCT_EXP_BUCKET_NAME"))
bucketName = os.Getenv("OPCT_EXP_BUCKET_NAME")
}
if os.Getenv("OPCT_EXP_BUCKET_REGION") != "" {
log.Warnf("NewBaselineReportSummary() Using custom bucket region: %s", os.Getenv("OPCT_EXP_BUCKET_REGION"))
bucketRegion = os.Getenv("OPCT_EXP_BUCKET_REGION")
}
return &BaselineConfig{
bucketName: opctStorageBucketName,
bucketRegion: opctStorageBucketRegion,
bucketName: bucketName,
bucketRegion: bucketRegion,
cloudfrontDistributionID: cloudfrontDistributionID,
}
}

// createS3Clients creates the S3 client and uploader to interact with the S3 storage, checking if
// bucket exists.
func (brs *BaselineConfig) createS3Clients() (*s3.S3, *s3manager.Uploader, error) {
if !brs.checkRequiredParams() {
return nil, nil, fmt.Errorf("missing required parameters or dependencies to enable this feature. Please wait for stable release to use it")
Expand All @@ -77,6 +106,7 @@ func (brs *BaselineConfig) createS3Clients() (*s3.S3, *s3manager.Uploader, error
return svcS3, uploader, nil
}

// ReadReportSummaryIndexFromAPI reads the summary report index from the OPCT report URL.
func (brs *BaselineConfig) ReadReportSummaryIndexFromAPI() (*baselineIndex, error) {
resp, err := brs.ReadReportSummaryFromAPI(objectPathBaselineReportSummaryPath)
if err != nil {
Expand Down Expand Up @@ -117,56 +147,55 @@ func (brs *BaselineConfig) ReadReportSummaryFromAPI(path string) ([]byte, error)
}
defer resp.Body.Close()

if resp.StatusCode < 200 && resp.StatusCode >= 300 {

// fmt.Println("response Body:", string(baselineBody))
// break
log.Debug("Summary Report API response code: ", resp.Status)
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, fmt.Errorf("error baseline API request: %s", resp.Status)
}
// fmt.Println("URL:>", url)
// fmt.Println("response Status:", resp.Status)
// fmt.Println("response Headers:", resp.Header)
rawResp, _ := io.ReadAll(resp.Body)

// brs.loadedBaseline = make(map[string]interface{})
// err = json.Unmarshal(dataBody, &brs.loadedBaseline)
// if err != nil {
// return err
// }
rawResp, err := io.ReadAll(resp.Body)
if err != nil {
log.WithError(err).Error("error reading response body")
return nil, err
}

return rawResp, nil
}

func (brs *BaselineConfig) GetLatestRawSummaryFromPlatformWithFallback(ocpVersion, platformType string) (body []byte, err error) {
// GetLatestRawSummaryFromPlatformWithFallback reads the latest summary report from the OPCT report
// service, trying to get the latest summary from the specified platform, and fallback to "None",
// and "AWS", when available.
func (brs *BaselineConfig) GetLatestRawSummaryFromPlatformWithFallback(ocpRelease, platformType string) error {
errCount := 0
evaluatePaths := []string{
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpVersion, platformType),
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpVersion, "None"),
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpVersion, "AWS"),
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpRelease, platformType),
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpRelease, "None"),
fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpRelease, "AWS"),
}
// body := []byte{}
// var err error
for _, path := range evaluatePaths {
// do not tolerate more than 10 errors
// do not tolerate many errors
if errCount > (len(evaluatePaths) * 2) {
log.Errorf("Too many errors, stopping the process")
break
}
body, err = brs.ReadReportSummaryFromAPI(path)
body, err := brs.ReadReportSummaryFromAPI(path)
if err != nil {
log.WithError(err).Error("error reading baseline report summary from API")
errCount++
continue
}
return body, err
brs.buffer = &BaselineData{}
brs.buffer.SetRawData(body)
return nil
}
return body, nil
return nil
}

// TODO get summary, marshall and return the struct.
// func (brs *BaselineConfig) GetSummaryFromPlatform(ocpVersion, platformType string) (*BaselineReport, error) {
func (brs *BaselineConfig) GetLatestSummaryByPlatform(ocpVersion, platformType string) error {
buf, err := brs.GetLatestRawSummaryFromPlatformWithFallback(ocpVersion, platformType)
// GetLatestSummaryByPlatform reads the latest summary report from the OPCT report service, trying to
// retrieve from release and platform.
// ocpRelease is the OpenShift major version, like "4.7", "4.8", etc.
func (brs *BaselineConfig) GetLatestSummaryByPlatform(ocpRelease, platformType string) error {
path := fmt.Sprintf("/result/summary/%s_%s_latest.json", ocpRelease, platformType)
buf, err := brs.ReadReportSummaryFromAPI(path)
if err != nil {
return fmt.Errorf("unable to get latest summary by platform: %w", err)
}
Expand Down Expand Up @@ -195,14 +224,6 @@ func (brs *BaselineConfig) checkRequiredParams() bool {
if os.Getenv("OPCT_ENABLE_ADM_BASELINE") == "" {

Check failure on line 224 in internal/report/baseline/baseline.go

View workflow job for this annotation

GitHub Actions / go-lint

S1008: should use 'return os.Getenv("OPCT_ENABLE_ADM_BASELINE") != ""' instead of 'if os.Getenv("OPCT_ENABLE_ADM_BASELINE") == "" { return false }; return true' (gosimple)

Check failure on line 224 in internal/report/baseline/baseline.go

View workflow job for this annotation

GitHub Actions / go-staticcheck

should use 'return os.Getenv("OPCT_ENABLE_ADM_BASELINE") != ""' instead of 'if os.Getenv("OPCT_ENABLE_ADM_BASELINE") == "" { return false }; return true' (S1008)
return false
}
if os.Getenv("OPCT_EXP_BUCKET_NAME") != "" {
log.Infof("Using custom bucket name: %s", os.Getenv("OPCT_EXP_BUCKET_NAME"))
brs.bucketName = os.Getenv("OPCT_EXP_BUCKET_NAME")
}
if os.Getenv("OPCT_EXP_BUCKET_REGION") != "" {
log.Infof("Using custom bucket region: %s", os.Getenv("OPCT_EXP_BUCKET_REGION"))
brs.bucketRegion = os.Getenv("OPCT_EXP_BUCKET_REGION")
}
return true
}

Expand Down
Loading

0 comments on commit 0a7c3fc

Please sign in to comment.