Skip to main content

Documentation Index

Fetch the complete documentation index at: https://sourcebot-msukkarieh-ado.mintlify.app/llms.txt

Use this file to discover all available pages before exploring further.

Sourcebot can sync code from GitHub.com, GitHub Enterprise Server, and GitHub Enterprise Cloud. If you’re not familiar with Sourcebot connections, please read that overview first.

Examples

{
    "type": "github",
    "repos": [
        "sourcebot-dev/sourcebot",
        "getsentry/sentry",
        "torvalds/linux"
    ]
}
{
    "type": "github",
    "orgs": [
        "sourcebot-dev",
        "getsentry",
        "vercel"
    ]
}
{
    "type": "github",
    "users": [
        "torvalds",
        "ggerganov"
    ]
}
{
    "type": "github",
    // Sync all repos in `my-org` that have a topic that...
    "orgs": [
        "my-org"
    ],
    // ...match one of these glob patterns.
    "topics": [
        "test-*",
        "ci-*",
        "k8s"
    ]
}

{
    "type": "github",
    // Include all repos in my-org...
    "orgs": [
        "my-org"
    ],
    // ...except:
    "exclude": {
        // repos that are archived
        "archived": true,
        // repos that are forks
        "forks": true,
        // repos that match these glob patterns
        "repos": [
            "my-org/repo1",
            "my-org/repo2",
            "my-org/sub-org-1/**",
            "my-org/sub-org-*/**"
        ],
        "size": {
            // repos that are less than 1MB (in bytes)...
            "min": 1048576,
            // or repos greater than 100MB (in bytes)
            "max": 104857600 
        },
        // repos with topics that match these glob patterns
        "topics": [
            "test-*",
            "ci"
        ]
    }
}

Authenticating with GitHub

In order to index private repositories, you’ll need to generate a access token and provide it to Sourcebot. GitHub provides two types of access tokens:

Fine-grained personal access tokens

Create a new fine-grained PAT here. First, select the resource owner and the repositories that you want Sourcebot to have access to.Next, under “Repository permissions”, select permissions Contents and Metadata with access Read-only. The permissions should look like the following:GitHub PAT ScopeGitHub docs
Create a new PAT here and make sure you select the repo scope:GitHub PAT ScopeGitHub docs
Next, provide the access token via the token property, either as an environment variable or a secret:
  1. Add the token property to your connection config:
{
    "type": "github",
    "token": {
        // note: this env var can be named anything. It
        // doesn't need to be `GITHUB_TOKEN`.
        "env": "GITHUB_TOKEN"
    }
    // .. rest of config ..
}
  1. Pass this environment variable each time you run Sourcebot:
docker run \
    -e GITHUB_TOKEN=<PAT> \
    /* additional args */ \
    ghcr.io/sourcebot-dev/sourcebot:latest

Connecting to a custom GitHub host

To connect to a GitHub host other than github.com, provide the url property to your config:
{
    "type": "github",
    "url": "https://github.example.com"
    // .. rest of config ..
}

Schema reference

schemas/v3/github.json
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "title": "GithubConnectionConfig",
  "properties": {
    "type": {
      "const": "github",
      "description": "GitHub Configuration"
    },
    "token": {
      "description": "A Personal Access Token (PAT).",
      "examples": [
        {
          "secret": "SECRET_KEY"
        }
      ],
      "anyOf": [
        {
          "type": "object",
          "properties": {
            "secret": {
              "type": "string",
              "description": "The name of the secret that contains the token."
            }
          },
          "required": [
            "secret"
          ],
          "additionalProperties": false
        },
        {
          "type": "object",
          "properties": {
            "env": {
              "type": "string",
              "description": "The name of the environment variable that contains the token. Only supported in declarative connection configs."
            }
          },
          "required": [
            "env"
          ],
          "additionalProperties": false
        }
      ]
    },
    "url": {
      "type": "string",
      "format": "url",
      "default": "https://github.com",
      "description": "The URL of the GitHub host. Defaults to https://github.com",
      "examples": [
        "https://github.com",
        "https://github.example.com"
      ],
      "pattern": "^https?:\\/\\/[^\\s/$.?#].[^\\s]*$"
    },
    "users": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+$"
      },
      "default": [],
      "examples": [
        [
          "torvalds",
          "DHH"
        ]
      ],
      "description": "List of users to sync with. All repositories that the user owns will be synced, unless explicitly defined in the `exclude` property."
    },
    "orgs": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+$"
      },
      "default": [],
      "examples": [
        [
          "my-org-name"
        ],
        [
          "sourcebot-dev",
          "commaai"
        ]
      ],
      "description": "List of organizations to sync with. All repositories in the organization visible to the provided `token` (if any) will be synced, unless explicitly defined in the `exclude` property."
    },
    "repos": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[\\w.-]+\\/[\\w.-]+$"
      },
      "default": [],
      "description": "List of individual repositories to sync with. Expected to be formatted as '{orgName}/{repoName}' or '{userName}/{repoName}'."
    },
    "topics": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "minItems": 1,
      "default": [],
      "description": "List of repository topics to include when syncing. Only repositories that match at least one of the provided `topics` will be synced. If not specified, all repositories will be synced, unless explicitly defined in the `exclude` property. Glob patterns are supported.",
      "examples": [
        [
          "docs",
          "core"
        ]
      ]
    },
    "exclude": {
      "type": "object",
      "properties": {
        "forks": {
          "type": "boolean",
          "default": false,
          "description": "Exclude forked repositories from syncing."
        },
        "archived": {
          "type": "boolean",
          "default": false,
          "description": "Exclude archived repositories from syncing."
        },
        "repos": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": [],
          "description": "List of individual repositories to exclude from syncing. Glob patterns are supported."
        },
        "topics": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": [],
          "description": "List of repository topics to exclude when syncing. Repositories that match one of the provided `topics` will be excluded from syncing. Glob patterns are supported.",
          "examples": [
            [
              "tests",
              "ci"
            ]
          ]
        },
        "size": {
          "type": "object",
          "description": "Exclude repositories based on their disk usage. Note: the disk usage is calculated by GitHub and may not reflect the actual disk usage when cloned.",
          "properties": {
            "min": {
              "type": "integer",
              "description": "Minimum repository size (in bytes) to sync (inclusive). Repositories less than this size will be excluded from syncing."
            },
            "max": {
              "type": "integer",
              "description": "Maximum repository size (in bytes) to sync (inclusive). Repositories greater than this size will be excluded from syncing."
            }
          },
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "revisions": {
      "type": "object",
      "description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
      "properties": {
        "branches": {
          "type": "array",
          "description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
          "items": {
            "type": "string"
          },
          "examples": [
            [
              "main",
              "release/*"
            ],
            [
              "**"
            ]
          ],
          "default": []
        },
        "tags": {
          "type": "array",
          "description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
          "items": {
            "type": "string"
          },
          "examples": [
            [
              "latest",
              "v2.*.*"
            ],
            [
              "**"
            ]
          ],
          "default": []
        }
      },
      "additionalProperties": false
    }
  },
  "required": [
    "type"
  ],
  "additionalProperties": false
}