dump_github_contributors.ps1 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #!/usr/bin/env pwsh
  2. $scriptDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
  3. . $(join-path $scriptDir contribs_shared.ps1)
  4. if ($null -eq $env:GITHUB_TOKEN)
  5. {
  6. throw "A GitHub API token is required to run this script properly without being rate limited. If you're a user, generate a personal access token and use that. If you're running this in a GitHub action, make sure you expose the GITHUB_TOKEN secret as an environment variable."
  7. }
  8. function load_contribs([string] $repo)
  9. {
  10. # https://developer.github.com/enterprise/2.8/v3/repos/#list-contributors
  11. # We use the ?anon=1 query param for reasons explained later.
  12. $qParams = @{
  13. "per_page" = 100
  14. "anon" = 1
  15. }
  16. $headers = @{
  17. Authorization="Bearer $env:GITHUB_TOKEN"
  18. }
  19. $url = "https://api.github.com/repos/{0}/contributors" -f $repo
  20. $r = @()
  21. while ($null -ne $url)
  22. {
  23. $resp = Invoke-WebRequest $url -Body $qParams -Headers $headers
  24. $url = $resp.RelationLink.next
  25. $j = ConvertFrom-Json $resp.Content
  26. $r += $j
  27. }
  28. # After collecting all the paginated data, we still aren't done.
  29. # GitHub's API, for some reason, has a hard cap on 500 email addresses per repo which it will collate
  30. # SS14 has gone past this limit for quite some time, so GitHub will stop including accounts, starting
  31. # with those that have lower contributions, as valid distinct users with a `login` field.
  32. #
  33. # This is obviously a problem.
  34. # To remedy, we first use the ?anon=1 parameter to force GitHub to include all committers emails, even
  35. # those that it has, in its great and infinite wisdom, chosen to not properly attach to a GitHub account.
  36. #
  37. # Of course, this is normally an issue -- we use this API specifically because we want to only get
  38. # committers with valid GitHub accounts, otherwise we pollute the contributor log with random aliases
  39. # and names that people don't use, things like that.
  40. #
  41. # So, okay, solution:
  42. # 1) Go over our list, and check for ones which only have a `name` and `email` field ('anonymous' contributors)
  43. # and which dont already appear.
  44. # 2) Check to see if the email ends with `@users.noreply.github.com`.
  45. # - To my knowledge, GitHub includes an email in the form of `(numbers)+(username)@users.noreply.github.com`
  46. # - when commits are made using someones GitHub account, and they aren't attaching another email to their account
  47. # 3) If an email of this form was found, we can assume this is one of the 'missing' contribs and extract their GitHub username.
  48. # 4) If an email of this form -wasn't- found, but they're still anonymous, we -unfortunately- still have to check if they're a valid GitHub user
  49. # because GitHub might have just force-anonymized them anyway!
  50. #
  51. # It's possible their `name` is a valid GitHub user, but that this is a coincidence and they aren't actually a contributor.
  52. # There is kind of not really jack shit we can do about that! It's not that common though and it's probably more likely to attribute
  53. # correctly than not.
  54. # 5) Then, we just add a `login` field to our object with their true username and let the rest of the code do its job.
  55. foreach ($contributor in $r)
  56. {
  57. if ($null -ne $contributor.name `
  58. -And $null -ne $contributor.email `
  59. -And $contributor.email -match '\d+\+(.*)@users\.noreply\.github\.com$')
  60. {
  61. $username = $Matches.1
  62. # Use their `name` if its equivalent to the extracted username,
  63. # since that one will have proper casing. Otherwise just let them be a lowercasecel
  64. if ($contributor.name.ToLower() -eq $username)
  65. {
  66. $username = $contributor.name
  67. }
  68. if (($r).login -contains $username)
  69. {
  70. continue
  71. }
  72. $contributor | Add-Member -MemberType NoteProperty -Name "login" -Value $username
  73. }
  74. elseif ($null -eq $contributor.login `
  75. -And $null -ne $contributor.name `
  76. -And !$contributor.name.Contains(" "))
  77. {
  78. $username = $contributor.name
  79. # They're an anonymous user, without a GH email, and their name doesn't contain a space
  80. # (since a valid GH username can't have a space)
  81. # Might still be a valid contrib???
  82. if (($r).login -contains $username)
  83. {
  84. continue
  85. }
  86. $userUrl = "https://api.github.com/users/{0}" -f $username
  87. try
  88. {
  89. $userResp = Invoke-WebRequest $userUrl -Headers $headers
  90. $userJ = ConvertFrom-Json $userResp.Content
  91. $contributor | Add-Member -MemberType NoteProperty -Name "login" -Value $userJ.login
  92. }
  93. catch {} # if it 404s do nothing. powershell doesn't seem to really have a simpler way to do this.
  94. }
  95. }
  96. return $r
  97. }
  98. $engineJson = load_contribs("space-wizards/RobustToolbox")
  99. $contentJson = load_contribs("space-wizards/space-station-14")
  100. ($engineJson).login + ($contentJson).login + ($add) `
  101. | select -unique `
  102. | Where-Object { -not $ignore[$_] }`
  103. | ForEach-Object { if($replacements[$_] -eq $null){ $_ } else { $replacements[$_] }} `
  104. | Sort-object `
  105. | Join-String -Separator ", "