From 98cee3ee61eb8708a9cfc32f0cb5e114395fab19 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:08:02 -0400 Subject: [PATCH 01/26] rename augur_data model file to data Signed-off-by: Adrian Edwards --- collectoss/application/db/models/__init__.py | 2 +- collectoss/application/db/models/{augur_data.py => data.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename collectoss/application/db/models/{augur_data.py => data.py} (100%) diff --git a/collectoss/application/db/models/__init__.py b/collectoss/application/db/models/__init__.py index bed0e4c8e..11c6b38fe 100644 --- a/collectoss/application/db/models/__init__.py +++ b/collectoss/application/db/models/__init__.py @@ -1,4 +1,4 @@ -from collectoss.application.db.models.augur_data import ( +from collectoss.application.db.models.data import ( ChaossMetricStatus, ChaossUser, ContributorAffiliation, diff --git a/collectoss/application/db/models/augur_data.py b/collectoss/application/db/models/data.py similarity index 100% rename from collectoss/application/db/models/augur_data.py rename to collectoss/application/db/models/data.py From 86e260bec65fc02d0b9939f957e6c261bfb86abc Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:09:34 -0400 Subject: [PATCH 02/26] rename augur_data to collection_data in schema arguments Signed-off-by: Adrian Edwards --- collectoss/application/db/models/data.py | 42 ++++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/collectoss/application/db/models/data.py b/collectoss/application/db/models/data.py index 7ea85eefc..a4e6b7e53 100644 --- a/collectoss/application/db/models/data.py +++ b/collectoss/application/db/models/data.py @@ -54,7 +54,7 @@ nullable=False, server_default=text("CURRENT_TIMESTAMP"), ), - schema="augur_data", + schema="collection_data", ) Index('repos_id', t_analysis_log.c.repos_id) @@ -337,7 +337,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): ), Index("repo_id,email_copy_1", "repo_id", "email"), Index("repo_id,affiliation_copy_1", "repo_id", "affiliation"), - schema="augur_data", + schema="collection_data", ) @@ -363,7 +363,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): ), Index("projects_id,email_copy_1", "repo_group_id", "email"), Index("projects_id,affiliation_copy_1", "repo_group_id", "affiliation"), - schema="augur_data", + schema="collection_data", ) @@ -394,7 +394,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): Index( "projects_id,year,affiliation_copy_1", "repo_group_id", "year", "affiliation" ), - schema="augur_data", + schema="collection_data", ) @@ -423,7 +423,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): Index("projects_id,email", "repo_group_id", "email"), Index("projects_id,year,email", "repo_group_id", "year", "email"), Index("projects_id,year,affiliation", "repo_group_id", "year", "affiliation"), - schema="augur_data", + schema="collection_data", ) @@ -452,7 +452,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): Index("repo_id,year,affiliation_copy_1", "repo_id", "year", "affiliation"), Index("repo_id,affiliation_copy_2", "repo_id", "affiliation"), Index("repo_id,email_copy_2", "repo_id", "email"), - schema="augur_data", + schema="collection_data", ) @@ -481,7 +481,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): Index("repo_id,email", "repo_id", "email"), Index("repo_id,year,email", "repo_id", "year", "email"), Index("repo_id,year,affiliation", "repo_id", "year", "affiliation"), - schema="augur_data", + schema="collection_data", ) @@ -530,7 +530,7 @@ class Platform(Base): pltfrm_id = Column( BigInteger, - Sequence('platform_pltfrm_id_seq', start=25430, schema="augur_data"), + Sequence('platform_pltfrm_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.platform_pltfrm_id_seq'::regclass)"), ) @@ -622,7 +622,7 @@ def get_by_name(session, rg_name): ), Index("repos_id,status", "repos_id", "status"), Index("repos_id,statusops", "repos_id", "status"), - schema="augur_data", + schema="collection_data", ) @@ -678,7 +678,7 @@ class TopicWord(Base): server_default=text("CURRENT_TIMESTAMP"), ), Index("type,projects_id", "type", "repo_group_id"), - schema="augur_data", + schema="collection_data", ) @@ -710,7 +710,7 @@ class UtilityLog(Base): id = Column( BigInteger, - Sequence('utility_log_id_seq1', start=1, schema="augur_data"), + Sequence('utility_log_id_seq1', start=1, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.utility_log_id_seq1'::regclass)"), ) @@ -728,7 +728,7 @@ class UtilityLog(Base): Column( "working_commit", String(40), server_default=text("'NULL'::character varying") ), - schema="augur_data", + schema="collection_data", ) @@ -1326,7 +1326,7 @@ class Commit(Base): cmt_id = Column( BigInteger, - Sequence('commits_cmt_id_seq', start=25430, schema="augur_data"), + Sequence('commits_cmt_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), ) @@ -1411,7 +1411,7 @@ class CommitMessage(Base): cmt_msg_id = Column( BigInteger, - Sequence('commits_cmt_id_seq', start=25430, schema="augur_data"), + Sequence('commits_cmt_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), ) @@ -1447,7 +1447,7 @@ class Issue(Base): issue_id = Column( BigInteger, - Sequence('issue_seq', start=31000, schema="augur_data"), + Sequence('issue_seq', start=31000, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.issue_seq'::regclass)"), ) @@ -1513,7 +1513,7 @@ class Library(Base): library_id = Column( BigInteger, - Sequence('libraries_library_id_seq', start=25430, schema="augur_data"), + Sequence('libraries_library_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.libraries_library_id_seq'::regclass)"), ) @@ -1597,7 +1597,7 @@ class Message(Base): msg_id = Column( BigInteger, - Sequence('message_msg_id_seq', start=25430, schema="augur_data"), + Sequence('message_msg_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.message_msg_id_seq'::regclass)"), ) @@ -1887,7 +1887,7 @@ class Release(Base): release_id = Column( CHAR(256), - Sequence('releases_release_id_seq', start=1, schema="augur_data"), + Sequence('releases_release_id_seq', start=1, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.releases_release_id_seq'::regclass)"), ) @@ -2149,7 +2149,7 @@ class RepoInsight(Base): ri_id = Column( BigInteger, - Sequence('repo_insights_ri_id_seq', start=25430, schema="augur_data"), + Sequence('repo_insights_ri_id_seq', start=25430, schema="collection_data"), primary_key=True, server_default=text("nextval('augur_data.repo_insights_ri_id_seq'::regclass)"), ) @@ -2268,7 +2268,7 @@ class RepoMeta(Base): ) rmeta_id = Column( BigInteger, - Sequence('repo_meta_rmeta_id_seq', start=25430, schema="augur_data"), + Sequence('repo_meta_rmeta_id_seq', start=25430, schema="collection_data"), primary_key=True, nullable=False, server_default=text("nextval('augur_data.repo_meta_rmeta_id_seq'::regclass)"), @@ -2312,7 +2312,7 @@ class RepoStat(Base): ) rstat_id = Column( BigInteger, - Sequence('repo_stats_rstat_id_seq', start=25430, schema="augur_data"), + Sequence('repo_stats_rstat_id_seq', start=25430, schema="collection_data"), primary_key=True, nullable=False, server_default=text("nextval('augur_data.repo_stats_rstat_id_seq'::regclass)"), From 9d907f2d9c3d33ef5af87d39e4e60ffc5d58d5ba Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:10:56 -0400 Subject: [PATCH 03/26] rename augur_data to collection_data in table arguments Signed-off-by: Adrian Edwards --- .../application/db/models/augur_operations.py | 2 +- collectoss/application/db/models/data.py | 138 +++++++++--------- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/collectoss/application/db/models/augur_operations.py b/collectoss/application/db/models/augur_operations.py index 760ea6c1a..5f25a92f4 100644 --- a/collectoss/application/db/models/augur_operations.py +++ b/collectoss/application/db/models/augur_operations.py @@ -221,7 +221,7 @@ class WorkerSettingsFacade(Base): class BadgingDEI(Base): __tablename__ = 'dei_badging' - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} id = Column(Integer, primary_key=True, nullable=False) badging_id = Column(Integer, nullable=False) level = Column(String, nullable=False) diff --git a/collectoss/application/db/models/data.py b/collectoss/application/db/models/data.py index a4e6b7e53..8d6dd17a5 100644 --- a/collectoss/application/db/models/data.py +++ b/collectoss/application/db/models/data.py @@ -63,7 +63,7 @@ class ChaossMetricStatus(Base): __tablename__ = "chaoss_metric_status" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "This table used to track CHAOSS Metric implementations, but due to the constantly changing location of that information, it is for the moment not actively populated. ", } @@ -97,7 +97,7 @@ class ChaossMetricStatus(Base): class ChaossUser(Base): __tablename__ = "chaoss_user" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} chaoss_id = Column( BigInteger, @@ -122,7 +122,7 @@ class ChaossUser(Base): class ContributorAffiliation(Base): __tablename__ = "contributor_affiliations" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "This table exists outside of relations with other tables. The purpose is to provide a dynamic, owner maintained (and collectoss augmented) list of affiliations. This table is processed in affiliation information in the DM_ tables generated when CollectOSS is finished counting commits using the Facade Worker. ", } @@ -178,7 +178,7 @@ class Contributor(Base): Index("login-contributor-idx", "cntrb_login"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "For GitHub, this should be repeated from gh_login. for other systems, it should be that systems login. \nGithub now allows a user to change their login name, but their user id remains the same in this case. So, the natural key is the combination of id and login, but there should never be repeated logins. ", }, ) @@ -487,7 +487,7 @@ def from_github(cls, contributor, tool_source, tool_version, data_source): class Exclude(Base): __tablename__ = "exclude" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} id = Column(Integer, primary_key=True) projects_id = Column(Integer, nullable=False) @@ -497,7 +497,7 @@ class Exclude(Base): class LstmAnomalyModel(Base): __tablename__ = "lstm_anomaly_models" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} model_id = Column( BigInteger, @@ -525,7 +525,7 @@ class Platform(Base): __tablename__ = "platform" __table_args__ = ( Index("plat", "pltfrm_id", unique=True), - {"schema": "augur_data"} + {"schema": "collection_data"} ) pltfrm_id = Column( @@ -548,7 +548,7 @@ class RepoGroup(Base): __table_args__ = ( Index("rgidm", "repo_group_id", unique=True), Index("rgnameindex", "rg_name"), - {"schema": "augur_data", + {"schema": "collection_data", "comment": "rg_type is intended to be either a GitHub Organization or a User Created Repo Group. "}, ) @@ -628,7 +628,7 @@ def get_by_name(session, rg_name): class Settings(Base): __tablename__ = "settings" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} id = Column(Integer, primary_key=True) setting = Column(String(32), nullable=False) @@ -640,7 +640,7 @@ class Settings(Base): class TopicWord(Base): __tablename__ = "topic_words" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} topic_words_id = Column( BigInteger, @@ -684,7 +684,7 @@ class TopicWord(Base): class UnresolvedCommitEmail(Base): __tablename__ = "unresolved_commit_emails" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} email_unresolved_id = Column( BigInteger, @@ -706,7 +706,7 @@ class UnresolvedCommitEmail(Base): class UtilityLog(Base): __tablename__ = "utility_log" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} id = Column( BigInteger, @@ -737,7 +737,7 @@ class ContributorRepo(Base): __table_args__ = ( UniqueConstraint("event_id", "tool_version"), { - "schema": "augur_data", + "schema": "collection_data", "comment": 'Developed in Partnership with Andrew Brain.', }, ) @@ -782,7 +782,7 @@ class ContributorsAlias(Base): __table_args__ = ( UniqueConstraint("cntrb_id","alias_email", name="cntrb-email-insert-unique"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "Every open source user may have more than one email used to make contributions over time. CollectOSS selects the first email it encounters for a user as its “canonical_email”. \n\nThe canonical_email is also added to the contributors_aliases table, with the canonical_email and alias_email being identical. Using this strategy, an email search will only need to join the alias table for basic email information, and can then more easily map the canonical email from each alias row to the same, more detailed information in the contributors table for a user. ", }, ) @@ -838,7 +838,7 @@ class Repo(Base): Index("therepo", "repo_id", unique=True), { - "schema": "augur_data", + "schema": "collection_data", "comment": "This table is a combination of the columns in Facade’s repo table and GHTorrent’s projects table. ", }, ) @@ -1192,7 +1192,7 @@ class HistoricalRepoURLs(Base): """ __tablename__ = "historical_repo_urls" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} repo_id = Column(ForeignKey("augur_data.repo.repo_id"), primary_key=True) git_url = Column(String, primary_key=True) @@ -1200,7 +1200,7 @@ class HistoricalRepoURLs(Base): class RepoTestCoverage(Base): __tablename__ = "repo_test_coverage" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} repo_id = Column( ForeignKey("augur_data.repo.repo_id"), @@ -1231,7 +1231,7 @@ class RepoTestCoverage(Base): class RepoGroupInsight(Base): __tablename__ = "repo_group_insights" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": 'This table is output from an analytical worker. It runs through the different metrics on a REPOSITORY_GROUP and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ', } @@ -1266,7 +1266,7 @@ class RepoGroupsListServe(Base): __table_args__ = ( UniqueConstraint("rgls_id", "repo_group_id"), Index("lister", "rgls_id", "repo_group_id", unique=True), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) rgls_id = Column( @@ -1319,7 +1319,7 @@ class Commit(Base): Index("repo_id,commit", "repo_id", "cmt_commit_hash"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "Commits.\nEach row represents changes to one FILE within a single commit. So you will encounter multiple rows per commit hash in many cases. ", }, ) @@ -1404,7 +1404,7 @@ class CommitMessage(Base): __table_args__ = ( UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "This table holds commit messages", } ) @@ -1442,7 +1442,7 @@ class Issue(Base): UniqueConstraint("repo_id", "gh_issue_id"), UniqueConstraint("issue_url", name="issue-insert-unique"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) issue_id = Column( @@ -1509,7 +1509,7 @@ class Issue(Base): class Library(Base): __tablename__ = "libraries" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} library_id = Column( BigInteger, @@ -1551,7 +1551,7 @@ class Library(Base): class LstmAnomalyResult(Base): __tablename__ = "lstm_anomaly_results" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} result_id = Column( BigInteger, @@ -1592,7 +1592,7 @@ class Message(Base): Index("msg-cntrb-id-idx", "cntrb_id"), Index("platformgrouper", "msg_id", "pltfrm_id"), Index("messagegrouper", "msg_id", "rgls_id", unique=True), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) msg_id = Column( @@ -1661,7 +1661,7 @@ class Message(Base): class MessageAnalysisSummary(Base): __tablename__ = "message_analysis_summary" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "In a relationally perfect world, we would have a table called “message_analysis_run” the incremented the “worker_run_id” for both message_analysis and message_analysis_summary. For now, we decided this was overkill. ", } @@ -1701,7 +1701,7 @@ class MessageAnalysisSummary(Base): class MessageSentimentSummary(Base): __tablename__ = "message_sentiment_summary" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "In a relationally perfect world, we would have a table called “message_sentiment_run” the incremented the “worker_run_id” for both message_sentiment and message_sentiment_summary. For now, we decided this was overkill. ", } @@ -1749,7 +1749,7 @@ class PullRequest(Base): "pull_requests_idx_repo_id_data_datex", "repo_id", "data_collection_date" ), Index("pr_ID_prs_table", "pull_request_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pull_request_id = Column( @@ -1883,7 +1883,7 @@ def from_github(cls, pr, repo_id, tool_source, tool_version): class Release(Base): __tablename__ = "releases" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} release_id = Column( CHAR(256), @@ -1916,7 +1916,7 @@ class Release(Base): class RepoBadging(Base): __tablename__ = "repo_badging" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "This will be collected from the LF’s Badging API\nhttps://bestpractices.coreinfrastructure.org/projects.json?pq=https%3A%2F%2Fgithub.com%2Fchaoss%2Faugur\n", } @@ -1960,7 +1960,7 @@ def insert(session, repo_id: int, data: dict) -> dict: class RepoClusterMessage(Base): __tablename__ = "repo_cluster_messages" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} msg_cluster_id = Column( BigInteger, @@ -1988,7 +1988,7 @@ class RepoDependency(Base): __table_args__ = ( UniqueConstraint("repo_id","dep_name","data_collection_date", name="deps-insert-unique"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "Contains the dependencies for a repo." }, ) @@ -2021,7 +2021,7 @@ class RepoDepsLibyear(Base): __tablename__ = "repo_deps_libyear" __table_args__ = ( UniqueConstraint("repo_id","name", "data_collection_date", name="deps-libyear-insert-unique"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) repo_deps_libyear_id = Column( @@ -2056,7 +2056,7 @@ class RepoDepsScorecard(Base): __tablename__ = "repo_deps_scorecard" __table_args__ = ( UniqueConstraint("repo_id","name", "data_collection_date", name="deps_scorecard_new_unique"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) repo_deps_scorecard_id = Column( @@ -2087,7 +2087,7 @@ class RepoInfo(Base): __table_args__ = ( Index("repo_info_idx_repo_id_data_date_1x", "repo_id", "data_collection_date"), Index("repo_info_idx_repo_id_data_datex", "repo_id", "data_collection_date"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) repo_info_id = Column( @@ -2143,7 +2143,7 @@ class RepoInfo(Base): class RepoInsight(Base): __tablename__ = "repo_insights" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": 'This table is output from an analytical worker. It runs through the different metrics on a repository and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ', } @@ -2178,7 +2178,7 @@ class RepoInsightsRecord(Base): __tablename__ = "repo_insights_records" __table_args__ = ( Index("dater", "ri_date"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) ri_id = Column( @@ -2223,7 +2223,7 @@ class RepoLabor(Base): __table_args__ = ( UniqueConstraint("repo_id", "rl_analysis_date", "file_path", "file_name"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "repo_labor is a derivative of tables used to store scc code and complexity counting statistics that are inputs to labor analysis, which are components of CHAOSS value metric calculations. ", }, ) @@ -2261,7 +2261,7 @@ class RepoLabor(Base): class RepoMeta(Base): __tablename__ = "repo_meta" - __table_args__ = {"schema": "augur_data", "comment": "Project Languages"} + __table_args__ = {"schema": "collection_data", "comment": "Project Languages"} repo_id = Column( ForeignKey("augur_data.repo.repo_id"), primary_key=True, nullable=False @@ -2285,7 +2285,7 @@ class RepoMeta(Base): class RepoSbomScan(Base): __tablename__ = "repo_sbom_scans" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} rsb_id = Column( BigInteger, @@ -2305,7 +2305,7 @@ class RepoSbomScan(Base): class RepoStat(Base): __tablename__ = "repo_stats" - __table_args__ = {"schema": "augur_data", "comment": "Project Watchers"} + __table_args__ = {"schema": "collection_data", "comment": "Project Watchers"} repo_id = Column( ForeignKey("augur_data.repo.repo_id"), primary_key=True, nullable=False @@ -2329,7 +2329,7 @@ class RepoStat(Base): class RepoTopic(Base): __tablename__ = "repo_topic" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} repo_topic_id = Column( BigInteger, @@ -2356,7 +2356,7 @@ class CommitCommentRef(Base): __tablename__ = "commit_comment_ref" __table_args__ = ( Index("comment_id", "cmt_comment_src_id", "cmt_comment_id", "msg_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) cmt_comment_id = Column( @@ -2413,7 +2413,7 @@ class CommitParent(Base): __table_args__ = ( Index("commit_parents_ibfk_1", "cmt_id"), Index("commit_parents_ibfk_2", "parent_id"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) cmt_id = Column( @@ -2446,7 +2446,7 @@ class CommitParent(Base): class DiscourseInsight(Base): __tablename__ = "discourse_insights" __table_args__ = { - "schema": "augur_data", + "schema": "collection_data", "comment": "This table is populated by the “Discourse_Analysis_Worker”. It examines sequential discourse, using computational linguistic methods, to draw statistical inferences regarding the discourse in a particular comment thread. ", } @@ -2475,7 +2475,7 @@ class IssueAssignee(Base): __table_args__ = ( Index("issue-cntrb-assign-idx-1", "cntrb_id"), UniqueConstraint("issue_assignee_src_id", "issue_id", name="issue-assignee-insert-unique"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) issue_assignee_id = Column( @@ -2535,7 +2535,7 @@ class IssueEvent(Base): Index("issue_events_ibfk_1", "issue_id"), Index("issue_events_ibfk_2", "cntrb_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) event_id = Column( @@ -2620,7 +2620,7 @@ class IssueLabel(Base): __tablename__ = "issue_labels" __table_args__ = ( UniqueConstraint("label_src_id", "issue_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) issue_label_id = Column( @@ -2677,7 +2677,7 @@ class IssueMessageRef(Base): __tablename__ = "issue_message_ref" __table_args__ = ( UniqueConstraint("issue_msg_ref_src_comment_id", "issue_id", name="issue-message-ref-insert-unique"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) issue_msg_ref_id = Column( @@ -2739,7 +2739,7 @@ class LibraryDependency(Base): __tablename__ = "library_dependencies" __table_args__ = ( Index("REPO_DEP", "library_id"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) lib_dependency_id = Column( @@ -2767,7 +2767,7 @@ class LibraryDependency(Base): class LibraryVersion(Base): __tablename__ = "library_version" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} library_version_id = Column( BigInteger, @@ -2793,7 +2793,7 @@ class LibraryVersion(Base): class MessageAnalysis(Base): __tablename__ = "message_analysis" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} msg_analysis_id = Column( BigInteger, @@ -2836,7 +2836,7 @@ class MessageAnalysis(Base): class MessageSentiment(Base): __tablename__ = "message_sentiment" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} msg_analysis_id = Column( BigInteger, @@ -2915,7 +2915,7 @@ class PullRequestAnalysis(Base): __table_args__ = ( Index("pr_anal_idx", pull_request_id), Index("probability_idx", merge_probability.desc().nullslast()), - {"schema": "augur_data"} + {"schema": "collection_data"} ) pull_request = relationship("PullRequest") @@ -2926,7 +2926,7 @@ class PullRequestAssignee(Base): __table_args__ = ( Index("pr_meta_cntrb-idx", "contrib_id"), UniqueConstraint("pull_request_id", "pr_assignee_src_id", name="assigniees-unique"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) pr_assignee_map_id = Column( @@ -2987,7 +2987,7 @@ class PullRequestCommit(Base): __table_args__ = ( UniqueConstraint("pull_request_id", "repo_id", "pr_cmt_sha"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ", }, ) @@ -3044,7 +3044,7 @@ class PullRequestEvent(Base): UniqueConstraint("repo_id", "issue_event_src_id", name="pr_events_repo_id_event_src_id_unique"), UniqueConstraint("platform_id", "node_id", name="unique-pr-event-id"), UniqueConstraint("node_id", name="pr-unqiue-event"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_event_id = Column( @@ -3142,7 +3142,7 @@ class PullRequestFile(Base): Index("pr_id_pr_files","pull_request_id"), UniqueConstraint("pull_request_id", "repo_id", "pr_file_path", name="prfiles_unique"), { - "schema": "augur_data", + "schema": "collection_data", "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ", }, ) @@ -3196,7 +3196,7 @@ class PullRequestLabel(Base): __tablename__ = "pull_request_labels" __table_args__ = ( UniqueConstraint("pr_src_id", "pull_request_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_label_id = Column( @@ -3258,7 +3258,7 @@ class PullRequestMessageRef(Base): __tablename__ = "pull_request_message_ref" __table_args__ = ( UniqueConstraint("pr_message_ref_src_comment_id", "pull_request_id", name="pull-request-message-ref-insert-unique"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_msg_ref_id = Column( @@ -3310,7 +3310,7 @@ class PullRequestMeta(Base): __table_args__ = ( Index("pr_meta-cntrbid-idx", "cntrb_id"), UniqueConstraint("pull_request_id", "pr_head_or_base", 'pr_sha', name="pull-request-meta-insert-unique"), - {"schema": "augur_data", + {"schema": "collection_data", "comment": 'Pull requests contain referencing metadata. There are a few columns that are discrete. There are also head and base designations for the repo on each side of the pull request. Similar functions exist in GitLab, though the language here is based on GitHub.'}, ) @@ -3384,7 +3384,7 @@ class PullRequestReviewer(Base): __table_args__ = ( Index("pr-reviewers-cntrb-idx1", "cntrb_id"), UniqueConstraint("pull_request_id", "pr_reviewer_src_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_reviewer_map_id = Column( @@ -3446,7 +3446,7 @@ class PullRequestReview(Base): __table_args__ = ( UniqueConstraint("pr_review_src_id", name="pr_review_unique"), Index("pr_id_pr_reviews", "pull_request_id"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_review_id = Column( @@ -3515,7 +3515,7 @@ class PullRequestReview(Base): class PullRequestTeam(Base): __tablename__ = "pull_request_teams" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} pr_team_id = Column( BigInteger, @@ -3557,7 +3557,7 @@ class PullRequestRepo(Base): __tablename__ = "pull_request_repo" __table_args__ = ( Index("pr-cntrb-idx-repo", "pr_cntrb_id"), - {"schema": "augur_data", + {"schema": "collection_data", "comment": "This table is for storing information about forks that exist as part of a pull request. Generally we do not want to track these like ordinary repositories. "}, ) @@ -3601,7 +3601,7 @@ class PullRequestReviewMessageRef(Base): __tablename__ = "pull_request_review_message_ref" __table_args__ = ( UniqueConstraint("pr_review_msg_src_id", name="pull-request-review-message-ref-insert-unique"), - {"schema": "augur_data"}, + {"schema": "collection_data"}, ) pr_review_msg_ref_id = Column( @@ -3675,7 +3675,7 @@ class PullRequestReviewMessageRef(Base): class RepoClone(Base): __tablename__ = "repo_clones_data" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} repo_clone_data_id = Column( BigInteger, @@ -3704,7 +3704,7 @@ class RepoClone(Base): class TopicModelMeta(Base): __tablename__ = "topic_model_meta" - __table_args__ = {"schema": "augur_data"} + __table_args__ = {"schema": "collection_data"} model_id = Column( UUID(as_uuid=True), @@ -3811,7 +3811,7 @@ class TopicModelEvent(Base): __table_args__ = ( Index("ix_tme_repo_ts", "repo_id", "ts"), Index("ix_tme_event", "event"), - {"schema": "augur_data"} + {"schema": "collection_data"} ) event_id = Column( From e63777183b2c0896675c2e3b344b64d03b649610 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:12:27 -0400 Subject: [PATCH 04/26] schema arguments (but single quotes this time) Signed-off-by: Adrian Edwards --- collectoss/application/db/models/data.py | 104 +++++++++++------------ 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/collectoss/application/db/models/data.py b/collectoss/application/db/models/data.py index 8d6dd17a5..650c793cb 100644 --- a/collectoss/application/db/models/data.py +++ b/collectoss/application/db/models/data.py @@ -69,7 +69,7 @@ class ChaossMetricStatus(Base): cms_id = Column( BigInteger, - Sequence('chaoss_metric_status_cms_id_seq', start=1, schema='augur_data'), + Sequence('chaoss_metric_status_cms_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.chaoss_metric_status_cms_id_seq'::regclass)" @@ -101,7 +101,7 @@ class ChaossUser(Base): chaoss_id = Column( BigInteger, - Sequence('chaoss_user_chaoss_id_seq', start=1, schema='augur_data'), + Sequence('chaoss_user_chaoss_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.chaoss_user_chaoss_id_seq'::regclass)" @@ -128,7 +128,7 @@ class ContributorAffiliation(Base): ca_id = Column( BigInteger, - Sequence('contributor_affiliations_ca_id_seq', start=25430, schema='augur_data'), + Sequence('contributor_affiliations_ca_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.contributor_affiliations_ca_id_seq'::regclass)" @@ -501,7 +501,7 @@ class LstmAnomalyModel(Base): model_id = Column( BigInteger, - Sequence('lstm_anomaly_models_model_id_seq', start=1, schema='augur_data'), + Sequence('lstm_anomaly_models_model_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.lstm_anomaly_models_model_id_seq'::regclass)" @@ -554,7 +554,7 @@ class RepoGroup(Base): repo_group_id = Column( BigInteger, - Sequence('repo_groups_repo_group_id_seq', start=25430, schema='augur_data'), + Sequence('repo_groups_repo_group_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_groups_repo_group_id_seq'::regclass)" @@ -644,7 +644,7 @@ class TopicWord(Base): topic_words_id = Column( BigInteger, - Sequence('topic_words_topic_words_id_seq', start=1, schema='augur_data'), + Sequence('topic_words_topic_words_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.topic_words_topic_words_id_seq'::regclass)" @@ -688,7 +688,7 @@ class UnresolvedCommitEmail(Base): email_unresolved_id = Column( BigInteger, - Sequence('unresolved_commit_emails_email_unresolved_id_seq', start=1, schema='augur_data'), + Sequence('unresolved_commit_emails_email_unresolved_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.unresolved_commit_emails_email_unresolved_id_seq'::regclass)" @@ -744,7 +744,7 @@ class ContributorRepo(Base): cntrb_repo_id = Column( BigInteger, - Sequence('contributor_repo_cntrb_repo_id_seq', start=1, schema='augur_data'), + Sequence('contributor_repo_cntrb_repo_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.contributor_repo_cntrb_repo_id_seq'::regclass)" @@ -789,7 +789,7 @@ class ContributorsAlias(Base): cntrb_alias_id = Column( BigInteger, - Sequence('contributors_aliases_cntrb_alias_id_seq', start=1, schema='augur_data'), + Sequence('contributors_aliases_cntrb_alias_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.contributors_aliases_cntrb_alias_id_seq'::regclass)" @@ -845,7 +845,7 @@ class Repo(Base): repo_id = Column( BigInteger, - Sequence('repo_repo_id_seq', start=25480, schema='augur_data'), + Sequence('repo_repo_id_seq', start=25480, schema='collection_data'), primary_key=True, server_default=text("nextval('augur_data.repo_repo_id_seq'::regclass)"), ) @@ -1204,7 +1204,7 @@ class RepoTestCoverage(Base): repo_id = Column( ForeignKey("augur_data.repo.repo_id"), - Sequence('repo_test_coverage_repo_id_seq', start=1, schema='augur_data'), + Sequence('repo_test_coverage_repo_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_test_coverage_repo_id_seq'::regclass)" @@ -1237,7 +1237,7 @@ class RepoGroupInsight(Base): rgi_id = Column( BigInteger, - Sequence('repo_group_insights_rgi_id_seq', start=25430, schema='augur_data'), + Sequence('repo_group_insights_rgi_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_group_insights_rgi_id_seq'::regclass)" @@ -1271,7 +1271,7 @@ class RepoGroupsListServe(Base): rgls_id = Column( BigInteger, - Sequence('repo_groups_list_serve_rgls_id_seq', start=25430, schema='augur_data'), + Sequence('repo_groups_list_serve_rgls_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_groups_list_serve_rgls_id_seq'::regclass)" @@ -1555,7 +1555,7 @@ class LstmAnomalyResult(Base): result_id = Column( BigInteger, - Sequence('lstm_anomaly_results_result_id_seq', start=1, schema='augur_data'), + Sequence('lstm_anomaly_results_result_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.lstm_anomaly_results_result_id_seq'::regclass)" @@ -1667,7 +1667,7 @@ class MessageAnalysisSummary(Base): msg_summary_id = Column( BigInteger, - Sequence('message_analysis_summary_msg_summary_id_seq', start=1, schema='augur_data'), + Sequence('message_analysis_summary_msg_summary_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.message_analysis_summary_msg_summary_id_seq'::regclass)" @@ -1707,7 +1707,7 @@ class MessageSentimentSummary(Base): msg_summary_id = Column( BigInteger, - Sequence('message_sentiment_summary_msg_summary_id_seq', start=1, schema='augur_data'), + Sequence('message_sentiment_summary_msg_summary_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.message_sentiment_summary_msg_summary_id_seq'::regclass)" @@ -1754,7 +1754,7 @@ class PullRequest(Base): pull_request_id = Column( BigInteger, - Sequence('pull_requests_pull_request_id_seq', start=25430, schema='augur_data'), + Sequence('pull_requests_pull_request_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_requests_pull_request_id_seq'::regclass)" @@ -1922,7 +1922,7 @@ class RepoBadging(Base): badge_collection_id = Column( BigInteger, - Sequence('repo_badging_badge_collection_id_seq', start=25012, schema='augur_data'), + Sequence('repo_badging_badge_collection_id_seq', start=25012, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_badging_badge_collection_id_seq'::regclass)" @@ -1964,7 +1964,7 @@ class RepoClusterMessage(Base): msg_cluster_id = Column( BigInteger, - Sequence('repo_cluster_messages_msg_cluster_id_seq', start=1, schema='augur_data'), + Sequence('repo_cluster_messages_msg_cluster_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_cluster_messages_msg_cluster_id_seq'::regclass)" @@ -1995,7 +1995,7 @@ class RepoDependency(Base): repo_dependencies_id = Column( BigInteger, - Sequence('repo_dependencies_repo_dependencies_id_seq', start=1, schema='augur_data'), + Sequence('repo_dependencies_repo_dependencies_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_dependencies_repo_dependencies_id_seq'::regclass)" @@ -2026,7 +2026,7 @@ class RepoDepsLibyear(Base): repo_deps_libyear_id = Column( BigInteger, - Sequence('repo_deps_libyear_repo_deps_libyear_id_seq', start=1, schema='augur_data'), + Sequence('repo_deps_libyear_repo_deps_libyear_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq'::regclass)" @@ -2061,7 +2061,7 @@ class RepoDepsScorecard(Base): repo_deps_scorecard_id = Column( BigInteger, - Sequence('repo_deps_scorecard_repo_deps_scorecard_id_seq1', start=1, schema='augur_data'), + Sequence('repo_deps_scorecard_repo_deps_scorecard_id_seq1', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1'::regclass)" @@ -2092,7 +2092,7 @@ class RepoInfo(Base): repo_info_id = Column( BigInteger, - Sequence('repo_info_repo_info_id_seq', start=25430, schema='augur_data'), + Sequence('repo_info_repo_info_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_info_repo_info_id_seq'::regclass)" @@ -2183,7 +2183,7 @@ class RepoInsightsRecord(Base): ri_id = Column( BigInteger, - Sequence('repo_insights_records_ri_id_seq', start=1, schema='augur_data'), + Sequence('repo_insights_records_ri_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_insights_records_ri_id_seq'::regclass)" @@ -2230,7 +2230,7 @@ class RepoLabor(Base): repo_labor_id = Column( BigInteger, - Sequence('repo_labor_repo_labor_id_seq', start=25430, schema='augur_data'), + Sequence('repo_labor_repo_labor_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_labor_repo_labor_id_seq'::regclass)" @@ -2289,7 +2289,7 @@ class RepoSbomScan(Base): rsb_id = Column( BigInteger, - Sequence('repo_sbom_scans_rsb_id_seq', start=25430, schema='augur_data'), + Sequence('repo_sbom_scans_rsb_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_sbom_scans_rsb_id_seq'::regclass)" @@ -2333,7 +2333,7 @@ class RepoTopic(Base): repo_topic_id = Column( BigInteger, - Sequence('repo_topic_repo_topic_id_seq', start=1, schema='augur_data'), + Sequence('repo_topic_repo_topic_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_topic_repo_topic_id_seq'::regclass)" @@ -2361,7 +2361,7 @@ class CommitCommentRef(Base): cmt_comment_id = Column( BigInteger, - Sequence('commit_comment_ref_cmt_comment_id_seq', start=25430, schema='augur_data'), + Sequence('commit_comment_ref_cmt_comment_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.commit_comment_ref_cmt_comment_id_seq'::regclass)" @@ -2423,7 +2423,7 @@ class CommitParent(Base): ) parent_id = Column( ForeignKey("augur_data.commits.cmt_id"), - Sequence('commit_parents_parent_id_seq', start=25430, schema='augur_data'), + Sequence('commit_parents_parent_id_seq', start=25430, schema='collection_data'), primary_key=True, nullable=False, server_default=text( @@ -2452,7 +2452,7 @@ class DiscourseInsight(Base): msg_discourse_id = Column( BigInteger, - Sequence('discourse_insights_msg_discourse_id_seq1', start=1, schema='augur_data'), + Sequence('discourse_insights_msg_discourse_id_seq1', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.discourse_insights_msg_discourse_id_seq1'::regclass)" @@ -2480,7 +2480,7 @@ class IssueAssignee(Base): issue_assignee_id = Column( BigInteger, - Sequence('issue_assignees_issue_assignee_id_seq', start=1, schema='augur_data'), + Sequence('issue_assignees_issue_assignee_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.issue_assignees_issue_assignee_id_seq'::regclass)" @@ -2540,7 +2540,7 @@ class IssueEvent(Base): event_id = Column( BigInteger, - Sequence('issue_events_event_id_seq', start=25430, schema='augur_data'), + Sequence('issue_events_event_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.issue_events_event_id_seq'::regclass)" @@ -2625,7 +2625,7 @@ class IssueLabel(Base): issue_label_id = Column( BigInteger, - Sequence('issue_labels_issue_label_id_seq', start=25430, schema='augur_data'), + Sequence('issue_labels_issue_label_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.issue_labels_issue_label_id_seq'::regclass)" @@ -2682,7 +2682,7 @@ class IssueMessageRef(Base): issue_msg_ref_id = Column( BigInteger, - Sequence('issue_message_ref_issue_msg_ref_id_seq', start=25430, schema='augur_data'), + Sequence('issue_message_ref_issue_msg_ref_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.issue_message_ref_issue_msg_ref_id_seq'::regclass)" @@ -2744,7 +2744,7 @@ class LibraryDependency(Base): lib_dependency_id = Column( BigInteger, - Sequence('library_dependencies_lib_dependency_id_seq', start=25430, schema='augur_data'), + Sequence('library_dependencies_lib_dependency_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.library_dependencies_lib_dependency_id_seq'::regclass)" @@ -2771,7 +2771,7 @@ class LibraryVersion(Base): library_version_id = Column( BigInteger, - Sequence('library_version_library_version_id_seq', start=25430, schema='augur_data'), + Sequence('library_version_library_version_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.library_version_library_version_id_seq'::regclass)" @@ -2797,7 +2797,7 @@ class MessageAnalysis(Base): msg_analysis_id = Column( BigInteger, - Sequence('message_analysis_msg_analysis_id_seq', start=1, schema='augur_data'), + Sequence('message_analysis_msg_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.message_analysis_msg_analysis_id_seq'::regclass)" @@ -2840,7 +2840,7 @@ class MessageSentiment(Base): msg_analysis_id = Column( BigInteger, - Sequence('message_sentiment_msg_analysis_id_seq', start=1, schema='augur_data'), + Sequence('message_sentiment_msg_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.message_sentiment_msg_analysis_id_seq'::regclass)" @@ -2881,7 +2881,7 @@ class PullRequestAnalysis(Base): pull_request_analysis_id = Column( BigInteger, - Sequence('pull_request_analysis_pull_request_analysis_id_seq', start=1, schema='augur_data'), + Sequence('pull_request_analysis_pull_request_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_analysis_pull_request_analysis_id_seq'::regclass)" @@ -2931,7 +2931,7 @@ class PullRequestAssignee(Base): pr_assignee_map_id = Column( BigInteger, - Sequence('pull_request_assignees_pr_assignee_map_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_assignees_pr_assignee_map_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_assignees_pr_assignee_map_id_seq'::regclass)" @@ -2994,7 +2994,7 @@ class PullRequestCommit(Base): pr_cmt_id = Column( BigInteger, - Sequence('pull_request_commits_pr_cmt_id_seq', start=1, schema='augur_data'), + Sequence('pull_request_commits_pr_cmt_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_commits_pr_cmt_id_seq'::regclass)" @@ -3049,7 +3049,7 @@ class PullRequestEvent(Base): pr_event_id = Column( BigInteger, - Sequence('pull_request_events_pr_event_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_events_pr_event_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_events_pr_event_id_seq'::regclass)" @@ -3149,7 +3149,7 @@ class PullRequestFile(Base): pr_file_id = Column( BigInteger, - Sequence('pull_request_files_pr_file_id_seq', start=25150, schema='augur_data'), + Sequence('pull_request_files_pr_file_id_seq', start=25150, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_files_pr_file_id_seq'::regclass)" @@ -3201,7 +3201,7 @@ class PullRequestLabel(Base): pr_label_id = Column( BigInteger, - Sequence('pull_request_labels_pr_label_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_labels_pr_label_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_labels_pr_label_id_seq'::regclass)" @@ -3263,7 +3263,7 @@ class PullRequestMessageRef(Base): pr_msg_ref_id = Column( BigInteger, - Sequence('pull_request_message_ref_pr_msg_ref_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_message_ref_pr_msg_ref_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq'::regclass)" @@ -3316,7 +3316,7 @@ class PullRequestMeta(Base): pr_repo_meta_id = Column( BigInteger, - Sequence('pull_request_meta_pr_repo_meta_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_meta_pr_repo_meta_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_meta_pr_repo_meta_id_seq'::regclass)" @@ -3389,7 +3389,7 @@ class PullRequestReviewer(Base): pr_reviewer_map_id = Column( BigInteger, - Sequence('pull_request_reviewers_pr_reviewer_map_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_reviewers_pr_reviewer_map_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq'::regclass)" @@ -3451,7 +3451,7 @@ class PullRequestReview(Base): pr_review_id = Column( BigInteger, - Sequence('pull_request_reviews_pr_review_id_seq', start=1, schema='augur_data'), + Sequence('pull_request_reviews_pr_review_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_reviews_pr_review_id_seq'::regclass)" @@ -3519,7 +3519,7 @@ class PullRequestTeam(Base): pr_team_id = Column( BigInteger, - Sequence('pull_request_teams_pr_team_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_teams_pr_team_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_teams_pr_team_id_seq'::regclass)" @@ -3563,7 +3563,7 @@ class PullRequestRepo(Base): pr_repo_id = Column( BigInteger, - Sequence('pull_request_repo_pr_repo_id_seq', start=25430, schema='augur_data'), + Sequence('pull_request_repo_pr_repo_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_repo_pr_repo_id_seq'::regclass)" @@ -3606,7 +3606,7 @@ class PullRequestReviewMessageRef(Base): pr_review_msg_ref_id = Column( BigInteger, - Sequence('pull_request_review_message_ref_pr_review_msg_ref_id_seq', start=1, schema='augur_data'), + Sequence('pull_request_review_message_ref_pr_review_msg_ref_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq'::regclass)" @@ -3679,7 +3679,7 @@ class RepoClone(Base): repo_clone_data_id = Column( BigInteger, - Sequence('repo_clones_data_id_seq', start=1, schema='augur_data'), + Sequence('repo_clones_data_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( "nextval('augur_data.repo_clones_data_id_seq'::regclass)" From bf798eae286dceeab8f5c1c90618a36401558da7 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:14:19 -0400 Subject: [PATCH 05/26] rename all references (foreign keys, sequences etc) within SQL Signed-off-by: Adrian Edwards --- .../application/db/models/augur_operations.py | 6 +- collectoss/application/db/models/data.py | 322 +++++++++--------- 2 files changed, 164 insertions(+), 164 deletions(-) diff --git a/collectoss/application/db/models/augur_operations.py b/collectoss/application/db/models/augur_operations.py index 5f25a92f4..68aaac06d 100644 --- a/collectoss/application/db/models/augur_operations.py +++ b/collectoss/application/db/models/augur_operations.py @@ -227,7 +227,7 @@ class BadgingDEI(Base): level = Column(String, nullable=False) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False + ForeignKey("collection_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False ) repo = relationship("Repo") @@ -749,7 +749,7 @@ class UserRepo(Base): ForeignKey("augur_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False + ForeignKey("collection_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False ) repo = relationship("Repo", back_populates="user_repo") @@ -1204,7 +1204,7 @@ class CollectionStatus(Base): {"schema": "augur_operations"} ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id", name="collection_status_repo_id_fk"), primary_key=True) + repo_id = Column(ForeignKey("collection_data.repo.repo_id", name="collection_status_repo_id_fk"), primary_key=True) core_data_last_collected = Column(TIMESTAMP) core_status = Column(String, nullable=False, server_default=text("'Pending'")) core_task_id = Column(String) diff --git a/collectoss/application/db/models/data.py b/collectoss/application/db/models/data.py index 650c793cb..4b8d7e5a9 100644 --- a/collectoss/application/db/models/data.py +++ b/collectoss/application/db/models/data.py @@ -72,7 +72,7 @@ class ChaossMetricStatus(Base): Sequence('chaoss_metric_status_cms_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.chaoss_metric_status_cms_id_seq'::regclass)" + "nextval('collection_data.chaoss_metric_status_cms_id_seq'::regclass)" ), ) cm_group = Column(String) @@ -104,7 +104,7 @@ class ChaossUser(Base): Sequence('chaoss_user_chaoss_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.chaoss_user_chaoss_id_seq'::regclass)" + "nextval('collection_data.chaoss_user_chaoss_id_seq'::regclass)" ), ) chaoss_login_name = Column(String) @@ -131,7 +131,7 @@ class ContributorAffiliation(Base): Sequence('contributor_affiliations_ca_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.contributor_affiliations_ca_id_seq'::regclass)" + "nextval('collection_data.contributor_affiliations_ca_id_seq'::regclass)" ), ) ca_domain = Column(String(64), nullable=False, unique=True) @@ -504,7 +504,7 @@ class LstmAnomalyModel(Base): Sequence('lstm_anomaly_models_model_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.lstm_anomaly_models_model_id_seq'::regclass)" + "nextval('collection_data.lstm_anomaly_models_model_id_seq'::regclass)" ), ) model_name = Column(String) @@ -532,7 +532,7 @@ class Platform(Base): BigInteger, Sequence('platform_pltfrm_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.platform_pltfrm_id_seq'::regclass)"), + server_default=text("nextval('collection_data.platform_pltfrm_id_seq'::regclass)"), ) pltfrm_name = Column(String) pltfrm_version = Column(String) @@ -557,7 +557,7 @@ class RepoGroup(Base): Sequence('repo_groups_repo_group_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_groups_repo_group_id_seq'::regclass)" + "nextval('collection_data.repo_groups_repo_group_id_seq'::regclass)" ), ) rg_name = Column(String, nullable=False) @@ -647,7 +647,7 @@ class TopicWord(Base): Sequence('topic_words_topic_words_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.topic_words_topic_words_id_seq'::regclass)" + "nextval('collection_data.topic_words_topic_words_id_seq'::regclass)" ), ) topic_id = Column(BigInteger) @@ -691,7 +691,7 @@ class UnresolvedCommitEmail(Base): Sequence('unresolved_commit_emails_email_unresolved_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.unresolved_commit_emails_email_unresolved_id_seq'::regclass)" + "nextval('collection_data.unresolved_commit_emails_email_unresolved_id_seq'::regclass)" ), ) email = Column(String, nullable=False, unique=True) @@ -712,7 +712,7 @@ class UtilityLog(Base): BigInteger, Sequence('utility_log_id_seq1', start=1, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.utility_log_id_seq1'::regclass)"), + server_default=text("nextval('collection_data.utility_log_id_seq1'::regclass)"), ) level = Column(String(8), nullable=False) status = Column(String, nullable=False) @@ -747,12 +747,12 @@ class ContributorRepo(Base): Sequence('contributor_repo_cntrb_repo_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.contributor_repo_cntrb_repo_id_seq'::regclass)" + "nextval('collection_data.contributor_repo_cntrb_repo_id_seq'::regclass)" ), ) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" ), nullable=False, comment="This is not null because what is the point without the contributor in this table? ", @@ -792,12 +792,12 @@ class ContributorsAlias(Base): Sequence('contributors_aliases_cntrb_alias_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.contributors_aliases_cntrb_alias_id_seq'::regclass)" + "nextval('collection_data.contributors_aliases_cntrb_alias_id_seq'::regclass)" ), ) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", + "collection_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE", deferrable=True, @@ -847,10 +847,10 @@ class Repo(Base): BigInteger, Sequence('repo_repo_id_seq', start=25480, schema='collection_data'), primary_key=True, - server_default=text("nextval('augur_data.repo_repo_id_seq'::regclass)"), + server_default=text("nextval('collection_data.repo_repo_id_seq'::regclass)"), ) repo_group_id = Column( - ForeignKey("augur_data.repo_groups.repo_group_id"), nullable=False + ForeignKey("collection_data.repo_groups.repo_group_id"), nullable=False ) repo_git = Column(String, nullable=False) @@ -1194,7 +1194,7 @@ class HistoricalRepoURLs(Base): __tablename__ = "historical_repo_urls" __table_args__ = {"schema": "collection_data"} - repo_id = Column(ForeignKey("augur_data.repo.repo_id"), primary_key=True) + repo_id = Column(ForeignKey("collection_data.repo.repo_id"), primary_key=True) git_url = Column(String, primary_key=True) date_collected = Column(DateTime(timezone=True), server_default=func.now(), nullable=True) @@ -1203,11 +1203,11 @@ class RepoTestCoverage(Base): __table_args__ = {"schema": "collection_data"} repo_id = Column( - ForeignKey("augur_data.repo.repo_id"), + ForeignKey("collection_data.repo.repo_id"), Sequence('repo_test_coverage_repo_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_test_coverage_repo_id_seq'::regclass)" + "nextval('collection_data.repo_test_coverage_repo_id_seq'::regclass)" ), ) repo_clone_date = Column(TIMESTAMP(precision=0)) @@ -1240,10 +1240,10 @@ class RepoGroupInsight(Base): Sequence('repo_group_insights_rgi_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_group_insights_rgi_id_seq'::regclass)" + "nextval('collection_data.repo_group_insights_rgi_id_seq'::regclass)" ), ) - repo_group_id = Column(ForeignKey("augur_data.repo_groups.repo_group_id")) + repo_group_id = Column(ForeignKey("collection_data.repo_groups.repo_group_id")) rgi_metric = Column(String) rgi_value = Column(String) cms_id = Column(BigInteger) @@ -1274,11 +1274,11 @@ class RepoGroupsListServe(Base): Sequence('repo_groups_list_serve_rgls_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_groups_list_serve_rgls_id_seq'::regclass)" + "nextval('collection_data.repo_groups_list_serve_rgls_id_seq'::regclass)" ), ) repo_group_id = Column( - ForeignKey("augur_data.repo_groups.repo_group_id"), nullable=False + ForeignKey("collection_data.repo_groups.repo_group_id"), nullable=False ) rgls_name = Column(String) rgls_description = Column(String(3000)) @@ -1328,10 +1328,10 @@ class Commit(Base): BigInteger, Sequence('commits_cmt_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), + server_default=text("nextval('collection_data.commits_cmt_id_seq'::regclass)"), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"), + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"), nullable=False, ) cmt_commit_hash = Column(String(80), nullable=False) @@ -1355,7 +1355,7 @@ class Commit(Base): cmt_filename = Column(String, nullable=False) cmt_date_attempted = Column(TIMESTAMP(precision=0), nullable=False) cmt_ght_author_id = Column(ForeignKey( - "augur_data.contributors.cntrb_id", + "collection_data.contributors.cntrb_id", name="cmt_ght_author_cntrb_id_fk", onupdate="CASCADE", ondelete="RESTRICT", @@ -1368,7 +1368,7 @@ class Commit(Base): cmt_author_timestamp = Column(TIMESTAMP(True, 0)) cmt_author_platform_username = Column( ForeignKey( - "augur_data.contributors.cntrb_login", + "collection_data.contributors.cntrb_login", name="fk_commits_contributors_3", ondelete="CASCADE", onupdate="CASCADE", @@ -1376,7 +1376,7 @@ class Commit(Base): deferrable=True, ), ForeignKey( - "augur_data.contributors.cntrb_login", + "collection_data.contributors.cntrb_login", name="fk_commits_contributors_4", ondelete="CASCADE", onupdate="CASCADE", @@ -1413,11 +1413,11 @@ class CommitMessage(Base): BigInteger, Sequence('commits_cmt_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), + server_default=text("nextval('collection_data.commits_cmt_id_seq'::regclass)"), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"), + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"), nullable=False, ) @@ -1449,13 +1449,13 @@ class Issue(Base): BigInteger, Sequence('issue_seq', start=31000, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.issue_seq'::regclass)"), + server_default=text("nextval('collection_data.issue_seq'::regclass)"), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE"), + ForeignKey("collection_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE"), ) reporter_id = Column( - ForeignKey("augur_data.contributors.cntrb_id"), + ForeignKey("collection_data.contributors.cntrb_id"), comment="The ID of the person who opened the issue. ", ) pull_request = Column(BigInteger) @@ -1464,7 +1464,7 @@ class Issue(Base): issue_title = Column(String) issue_body = Column(String) cntrb_id = Column( - ForeignKey("augur_data.contributors.cntrb_id"), + ForeignKey("collection_data.contributors.cntrb_id"), comment="The ID of the person who closed the issue. ", ) comment_count = Column(BigInteger) @@ -1515,9 +1515,9 @@ class Library(Base): BigInteger, Sequence('libraries_library_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.libraries_library_id_seq'::regclass)"), + server_default=text("nextval('collection_data.libraries_library_id_seq'::regclass)"), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) platform = Column(String) name = Column(String) created_timestamp = Column( @@ -1558,12 +1558,12 @@ class LstmAnomalyResult(Base): Sequence('lstm_anomaly_results_result_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.lstm_anomaly_results_result_id_seq'::regclass)" + "nextval('collection_data.lstm_anomaly_results_result_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) repo_category = Column(String) - model_id = Column(ForeignKey("augur_data.lstm_anomaly_models.model_id")) + model_id = Column(ForeignKey("collection_data.lstm_anomaly_models.model_id")) metric = Column(String) contamination_factor = Column(Float(53)) mean_absolute_error = Column(Float(53)) @@ -1599,11 +1599,11 @@ class Message(Base): BigInteger, Sequence('message_msg_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.message_msg_id_seq'::regclass)"), + server_default=text("nextval('collection_data.message_msg_id_seq'::regclass)"), ) rgls_id = Column( ForeignKey( - "augur_data.repo_groups_list_serve.rgls_id", + "collection_data.repo_groups_list_serve.rgls_id", ondelete="CASCADE", onupdate="CASCADE", ) @@ -1612,7 +1612,7 @@ class Message(Base): platform_node_id = Column(String) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE", deferrable=True, @@ -1621,7 +1621,7 @@ class Message(Base): ) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" ), comment="Not populated for mailing lists. Populated for GitHub issues. ", ) @@ -1631,7 +1631,7 @@ class Message(Base): msg_header = Column(String) pltfrm_id = Column( ForeignKey( - "augur_data.platform.pltfrm_id", ondelete="CASCADE", onupdate="CASCADE" + "collection_data.platform.pltfrm_id", ondelete="CASCADE", onupdate="CASCADE" ), nullable=False, ) @@ -1670,10 +1670,10 @@ class MessageAnalysisSummary(Base): Sequence('message_analysis_summary_msg_summary_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.message_analysis_summary_msg_summary_id_seq'::regclass)" + "nextval('collection_data.message_analysis_summary_msg_summary_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) worker_run_id = Column( BigInteger, comment='This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ', @@ -1710,10 +1710,10 @@ class MessageSentimentSummary(Base): Sequence('message_sentiment_summary_msg_summary_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.message_sentiment_summary_msg_summary_id_seq'::regclass)" + "nextval('collection_data.message_sentiment_summary_msg_summary_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) worker_run_id = Column( BigInteger, comment='This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ', @@ -1757,11 +1757,11 @@ class PullRequest(Base): Sequence('pull_requests_pull_request_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_requests_pull_request_id_seq'::regclass)" + "nextval('collection_data.pull_requests_pull_request_id_seq'::regclass)" ), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE"), + ForeignKey("collection_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE"), server_default=text("0"), ) pr_url = Column(String) @@ -1784,7 +1784,7 @@ class PullRequest(Base): pr_src_title = Column(String) pr_augur_contributor_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" ), comment="This is to link to the contributor record. ", ) @@ -1889,9 +1889,9 @@ class Release(Base): CHAR(256), Sequence('releases_release_id_seq', start=1, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.releases_release_id_seq'::regclass)"), + server_default=text("nextval('collection_data.releases_release_id_seq'::regclass)"), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id"), nullable=False) + repo_id = Column(ForeignKey("collection_data.repo.repo_id"), nullable=False) release_name = Column(String) release_description = Column(String) release_author = Column(String) @@ -1925,10 +1925,10 @@ class RepoBadging(Base): Sequence('repo_badging_badge_collection_id_seq', start=25012, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_badging_badge_collection_id_seq'::regclass)" + "nextval('collection_data.repo_badging_badge_collection_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) created_at = Column( TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP") ) @@ -1967,10 +1967,10 @@ class RepoClusterMessage(Base): Sequence('repo_cluster_messages_msg_cluster_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_cluster_messages_msg_cluster_id_seq'::regclass)" + "nextval('collection_data.repo_cluster_messages_msg_cluster_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) cluster_content = Column(Integer) cluster_mechanism = Column(Integer) tool_source = Column(String) @@ -1998,11 +1998,11 @@ class RepoDependency(Base): Sequence('repo_dependencies_repo_dependencies_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_dependencies_repo_dependencies_id_seq'::regclass)" + "nextval('collection_data.repo_dependencies_repo_dependencies_id_seq'::regclass)" ), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id"), comment="Forign key for repo id. " + ForeignKey("collection_data.repo.repo_id"), comment="Forign key for repo id. " ) dep_name = Column(String, comment="Name of the dependancy found in project. ") dep_count = Column(Integer, comment="Number of times the dependancy was found. ") @@ -2029,10 +2029,10 @@ class RepoDepsLibyear(Base): Sequence('repo_deps_libyear_repo_deps_libyear_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq'::regclass)" + "nextval('collection_data.repo_deps_libyear_repo_deps_libyear_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) name = Column(String) requirement = Column(String) type = Column(String) @@ -2064,10 +2064,10 @@ class RepoDepsScorecard(Base): Sequence('repo_deps_scorecard_repo_deps_scorecard_id_seq1', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1'::regclass)" + "nextval('collection_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) name = Column(String) #status = Column(String) scorecard_check_details = Column(JSONB) @@ -2095,10 +2095,10 @@ class RepoInfo(Base): Sequence('repo_info_repo_info_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_info_repo_info_id_seq'::regclass)" + "nextval('collection_data.repo_info_repo_info_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id"), nullable=False) + repo_id = Column(ForeignKey("collection_data.repo.repo_id"), nullable=False) last_updated = Column( TIMESTAMP(precision=0), server_default=text("NULL::timestamp without time zone") ) @@ -2151,9 +2151,9 @@ class RepoInsight(Base): BigInteger, Sequence('repo_insights_ri_id_seq', start=25430, schema="collection_data"), primary_key=True, - server_default=text("nextval('augur_data.repo_insights_ri_id_seq'::regclass)"), + server_default=text("nextval('collection_data.repo_insights_ri_id_seq'::regclass)"), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) ri_metric = Column(String) ri_value = Column(String) ri_date = Column(TIMESTAMP(precision=0)) @@ -2186,12 +2186,12 @@ class RepoInsightsRecord(Base): Sequence('repo_insights_records_ri_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_insights_records_ri_id_seq'::regclass)" + "nextval('collection_data.repo_insights_records_ri_id_seq'::regclass)" ), comment="Primary key. ", ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="SET NULL", onupdate="CASCADE"), + ForeignKey("collection_data.repo.repo_id", ondelete="SET NULL", onupdate="CASCADE"), comment="Refers to repo table primary key. Will have a foreign key", ) ri_metric = Column(String, comment="The metric endpoint") @@ -2233,10 +2233,10 @@ class RepoLabor(Base): Sequence('repo_labor_repo_labor_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_labor_repo_labor_id_seq'::regclass)" + "nextval('collection_data.repo_labor_repo_labor_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) repo_clone_date = Column(TIMESTAMP(precision=0)) rl_analysis_date = Column(TIMESTAMP(precision=0)) programming_language = Column(String) @@ -2264,14 +2264,14 @@ class RepoMeta(Base): __table_args__ = {"schema": "collection_data", "comment": "Project Languages"} repo_id = Column( - ForeignKey("augur_data.repo.repo_id"), primary_key=True, nullable=False + ForeignKey("collection_data.repo.repo_id"), primary_key=True, nullable=False ) rmeta_id = Column( BigInteger, Sequence('repo_meta_rmeta_id_seq', start=25430, schema="collection_data"), primary_key=True, nullable=False, - server_default=text("nextval('augur_data.repo_meta_rmeta_id_seq'::regclass)"), + server_default=text("nextval('collection_data.repo_meta_rmeta_id_seq'::regclass)"), ) rmeta_name = Column(String) rmeta_value = Column(String, server_default=text("0")) @@ -2292,11 +2292,11 @@ class RepoSbomScan(Base): Sequence('repo_sbom_scans_rsb_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_sbom_scans_rsb_id_seq'::regclass)" + "nextval('collection_data.repo_sbom_scans_rsb_id_seq'::regclass)" ), ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="CASCADE", onupdate="CASCADE") ) sbom_scan = Column(JSON) @@ -2308,14 +2308,14 @@ class RepoStat(Base): __table_args__ = {"schema": "collection_data", "comment": "Project Watchers"} repo_id = Column( - ForeignKey("augur_data.repo.repo_id"), primary_key=True, nullable=False + ForeignKey("collection_data.repo.repo_id"), primary_key=True, nullable=False ) rstat_id = Column( BigInteger, Sequence('repo_stats_rstat_id_seq', start=25430, schema="collection_data"), primary_key=True, nullable=False, - server_default=text("nextval('augur_data.repo_stats_rstat_id_seq'::regclass)"), + server_default=text("nextval('collection_data.repo_stats_rstat_id_seq'::regclass)"), ) rstat_name = Column(String(400)) rstat_value = Column(BigInteger) @@ -2336,10 +2336,10 @@ class RepoTopic(Base): Sequence('repo_topic_repo_topic_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_topic_repo_topic_id_seq'::regclass)" + "nextval('collection_data.repo_topic_repo_topic_id_seq'::regclass)" ), ) - repo_id = Column(ForeignKey("augur_data.repo.repo_id")) + repo_id = Column(ForeignKey("collection_data.repo.repo_id")) topic_id = Column(Integer) topic_prob = Column(Float(53)) tool_source = Column(String) @@ -2364,19 +2364,19 @@ class CommitCommentRef(Base): Sequence('commit_comment_ref_cmt_comment_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.commit_comment_ref_cmt_comment_id_seq'::regclass)" + "nextval('collection_data.commit_comment_ref_cmt_comment_id_seq'::regclass)" ), ) cmt_id = Column( ForeignKey( - "augur_data.commits.cmt_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.commits.cmt_id", ondelete="RESTRICT", onupdate="CASCADE" ), nullable=False, ) repo_id = Column(BigInteger) msg_id = Column( ForeignKey( - "augur_data.message.msg_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.message.msg_id", ondelete="RESTRICT", onupdate="CASCADE" ), nullable=False, ) @@ -2417,17 +2417,17 @@ class CommitParent(Base): ) cmt_id = Column( - ForeignKey("augur_data.commits.cmt_id"), + ForeignKey("collection_data.commits.cmt_id"), primary_key=True, nullable=False, ) parent_id = Column( - ForeignKey("augur_data.commits.cmt_id"), + ForeignKey("collection_data.commits.cmt_id"), Sequence('commit_parents_parent_id_seq', start=25430, schema='collection_data'), primary_key=True, nullable=False, server_default=text( - "nextval('augur_data.commit_parents_parent_id_seq'::regclass)" + "nextval('collection_data.commit_parents_parent_id_seq'::regclass)" ), ) tool_source = Column(String) @@ -2455,10 +2455,10 @@ class DiscourseInsight(Base): Sequence('discourse_insights_msg_discourse_id_seq1', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.discourse_insights_msg_discourse_id_seq1'::regclass)" + "nextval('collection_data.discourse_insights_msg_discourse_id_seq1'::regclass)" ), ) - msg_id = Column(ForeignKey("augur_data.message.msg_id")) + msg_id = Column(ForeignKey("collection_data.message.msg_id")) discourse_act = Column(String) tool_source = Column(String) tool_version = Column(String) @@ -2483,14 +2483,14 @@ class IssueAssignee(Base): Sequence('issue_assignees_issue_assignee_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.issue_assignees_issue_assignee_id_seq'::regclass)" + "nextval('collection_data.issue_assignees_issue_assignee_id_seq'::regclass)" ), ) - issue_id = Column(ForeignKey("augur_data.issues.issue_id")) + issue_id = Column(ForeignKey("collection_data.issues.issue_id")) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) - cntrb_id = Column(ForeignKey("augur_data.contributors.cntrb_id")) + cntrb_id = Column(ForeignKey("collection_data.contributors.cntrb_id")) issue_assignee_src_id = Column( BigInteger, comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.", @@ -2543,21 +2543,21 @@ class IssueEvent(Base): Sequence('issue_events_event_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.issue_events_event_id_seq'::regclass)" + "nextval('collection_data.issue_events_event_id_seq'::regclass)" ), ) issue_id = Column( ForeignKey( - "augur_data.issues.issue_id", ondelete="CASCADE", onupdate="CASCADE" + "collection_data.issues.issue_id", ondelete="CASCADE", onupdate="CASCADE" ), nullable=False, ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" ) ) action = Column(String, nullable=False) @@ -2572,7 +2572,7 @@ class IssueEvent(Base): node_url = Column(String) platform_id = Column( ForeignKey( - "augur_data.platform.pltfrm_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.platform.pltfrm_id", ondelete="RESTRICT", onupdate="CASCADE" ), nullable=False, ) @@ -2628,14 +2628,14 @@ class IssueLabel(Base): Sequence('issue_labels_issue_label_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.issue_labels_issue_label_id_seq'::regclass)" + "nextval('collection_data.issue_labels_issue_label_id_seq'::regclass)" ), ) issue_id = Column( - ForeignKey("augur_data.issues.issue_id", ondelete="CASCADE", onupdate="CASCADE") + ForeignKey("collection_data.issues.issue_id", ondelete="CASCADE", onupdate="CASCADE") ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) label_text = Column(String) label_description = Column(String) @@ -2685,12 +2685,12 @@ class IssueMessageRef(Base): Sequence('issue_message_ref_issue_msg_ref_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.issue_message_ref_issue_msg_ref_id_seq'::regclass)" + "nextval('collection_data.issue_message_ref_issue_msg_ref_id_seq'::regclass)" ), ) issue_id = Column( ForeignKey( - "augur_data.issues.issue_id", + "collection_data.issues.issue_id", ondelete="CASCADE", onupdate="CASCADE", deferrable=True, @@ -2699,7 +2699,7 @@ class IssueMessageRef(Base): ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -2708,7 +2708,7 @@ class IssueMessageRef(Base): ) msg_id = Column( ForeignKey( - "augur_data.message.msg_id", + "collection_data.message.msg_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -2747,10 +2747,10 @@ class LibraryDependency(Base): Sequence('library_dependencies_lib_dependency_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.library_dependencies_lib_dependency_id_seq'::regclass)" + "nextval('collection_data.library_dependencies_lib_dependency_id_seq'::regclass)" ), ) - library_id = Column(ForeignKey("augur_data.libraries.library_id")) + library_id = Column(ForeignKey("collection_data.libraries.library_id")) manifest_platform = Column(String) manifest_filepath = Column( String(1000), server_default=text("NULL::character varying") @@ -2774,10 +2774,10 @@ class LibraryVersion(Base): Sequence('library_version_library_version_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.library_version_library_version_id_seq'::regclass)" + "nextval('collection_data.library_version_library_version_id_seq'::regclass)" ), ) - library_id = Column(ForeignKey("augur_data.libraries.library_id")) + library_id = Column(ForeignKey("collection_data.libraries.library_id")) library_platform = Column(String) version_number = Column(String) version_release_date = Column( @@ -2800,10 +2800,10 @@ class MessageAnalysis(Base): Sequence('message_analysis_msg_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.message_analysis_msg_analysis_id_seq'::regclass)" + "nextval('collection_data.message_analysis_msg_analysis_id_seq'::regclass)" ), ) - msg_id = Column(ForeignKey("augur_data.message.msg_id")) + msg_id = Column(ForeignKey("collection_data.message.msg_id")) worker_run_id = Column( BigInteger, comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. ", @@ -2843,10 +2843,10 @@ class MessageSentiment(Base): Sequence('message_sentiment_msg_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.message_sentiment_msg_analysis_id_seq'::regclass)" + "nextval('collection_data.message_sentiment_msg_analysis_id_seq'::regclass)" ), ) - msg_id = Column(ForeignKey("augur_data.message.msg_id")) + msg_id = Column(ForeignKey("collection_data.message.msg_id")) worker_run_id = Column( BigInteger, comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. ", @@ -2884,12 +2884,12 @@ class PullRequestAnalysis(Base): Sequence('pull_request_analysis_pull_request_analysis_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_analysis_pull_request_analysis_id_seq'::regclass)" + "nextval('collection_data.pull_request_analysis_pull_request_analysis_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ), @@ -2934,26 +2934,26 @@ class PullRequestAssignee(Base): Sequence('pull_request_assignees_pr_assignee_map_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_assignees_pr_assignee_map_id_seq'::regclass)" + "nextval('collection_data.pull_request_assignees_pr_assignee_map_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, initially="DEFERRED", ) ) - contrib_id = Column(ForeignKey("augur_data.contributors.cntrb_id")) + contrib_id = Column(ForeignKey("collection_data.contributors.cntrb_id")) pr_assignee_src_id = Column(BigInteger) tool_source = Column(String) tool_version = Column(String) @@ -2997,18 +2997,18 @@ class PullRequestCommit(Base): Sequence('pull_request_commits_pr_cmt_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_commits_pr_cmt_id_seq'::regclass)" + "nextval('collection_data.pull_request_commits_pr_cmt_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) pr_cmt_sha = Column( String, @@ -3019,7 +3019,7 @@ class PullRequestCommit(Base): pr_cmt_comments_url = Column(String) pr_cmt_author_cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" ) ) pr_cmt_timestamp = Column(TIMESTAMP(precision=0)) @@ -3052,12 +3052,12 @@ class PullRequestEvent(Base): Sequence('pull_request_events_pr_event_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_events_pr_event_id_seq'::regclass)" + "nextval('collection_data.pull_request_events_pr_event_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ), @@ -3065,7 +3065,7 @@ class PullRequestEvent(Base): ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="RESTRICT", deferrable=True, @@ -3073,7 +3073,7 @@ class PullRequestEvent(Base): ) ) cntrb_id = Column( - ForeignKey("augur_data.contributors.cntrb_id") + ForeignKey("collection_data.contributors.cntrb_id") ) action = Column(String, nullable=False) action_commit_hash = Column(String) @@ -3091,7 +3091,7 @@ class PullRequestEvent(Base): node_url = Column(String) platform_id = Column( ForeignKey( - "augur_data.platform.pltfrm_id", + "collection_data.platform.pltfrm_id", ondelete="RESTRICT", onupdate="RESTRICT", deferrable=True, @@ -3152,19 +3152,19 @@ class PullRequestFile(Base): Sequence('pull_request_files_pr_file_id_seq', start=25150, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_files_pr_file_id_seq'::regclass)" + "nextval('collection_data.pull_request_files_pr_file_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3204,18 +3204,18 @@ class PullRequestLabel(Base): Sequence('pull_request_labels_pr_label_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_labels_pr_label_id_seq'::regclass)" + "nextval('collection_data.pull_request_labels_pr_label_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) pr_src_id = Column(BigInteger) pr_src_node_id = Column(String) @@ -3266,12 +3266,12 @@ class PullRequestMessageRef(Base): Sequence('pull_request_message_ref_pr_msg_ref_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq'::regclass)" + "nextval('collection_data.pull_request_message_ref_pr_msg_ref_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", deferrable=True, @@ -3279,11 +3279,11 @@ class PullRequestMessageRef(Base): ) ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) msg_id = Column( ForeignKey( - "augur_data.message.msg_id", + "collection_data.message.msg_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3319,19 +3319,19 @@ class PullRequestMeta(Base): Sequence('pull_request_meta_pr_repo_meta_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_meta_pr_repo_meta_id_seq'::regclass)" + "nextval('collection_data.pull_request_meta_pr_repo_meta_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3348,7 +3348,7 @@ class PullRequestMeta(Base): ) pr_src_meta_ref = Column(String) pr_sha = Column(String) - cntrb_id = Column(ForeignKey("augur_data.contributors.cntrb_id")) + cntrb_id = Column(ForeignKey("collection_data.contributors.cntrb_id")) tool_source = Column(String) tool_version = Column(String) data_source = Column(String) @@ -3392,12 +3392,12 @@ class PullRequestReviewer(Base): Sequence('pull_request_reviewers_pr_reviewer_map_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq'::regclass)" + "nextval('collection_data.pull_request_reviewers_pr_reviewer_map_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) @@ -3409,7 +3409,7 @@ class PullRequestReviewer(Base): repo_id = Column(BigInteger) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="CASCADE", onupdate="CASCADE" ), ) pr_reviewer_src_id = Column( @@ -3454,23 +3454,23 @@ class PullRequestReview(Base): Sequence('pull_request_reviews_pr_review_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_reviews_pr_review_id_seq'::regclass)" + "nextval('collection_data.pull_request_reviews_pr_review_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ), nullable=False, ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") + ForeignKey("collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE") ) cntrb_id = Column( ForeignKey( - "augur_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" + "collection_data.contributors.cntrb_id", ondelete="RESTRICT", onupdate="CASCADE" ), nullable=False, ) @@ -3485,7 +3485,7 @@ class PullRequestReview(Base): pr_review_commit_id = Column(String) platform_id = Column( ForeignKey( - "augur_data.platform.pltfrm_id", + "collection_data.platform.pltfrm_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3522,12 +3522,12 @@ class PullRequestTeam(Base): Sequence('pull_request_teams_pr_team_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_teams_pr_team_id_seq'::regclass)" + "nextval('collection_data.pull_request_teams_pr_team_id_seq'::regclass)" ), ) pull_request_id = Column( ForeignKey( - "augur_data.pull_requests.pull_request_id", + "collection_data.pull_requests.pull_request_id", ondelete="CASCADE", onupdate="CASCADE", ) @@ -3566,12 +3566,12 @@ class PullRequestRepo(Base): Sequence('pull_request_repo_pr_repo_id_seq', start=25430, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_repo_pr_repo_id_seq'::regclass)" + "nextval('collection_data.pull_request_repo_pr_repo_id_seq'::regclass)" ), ) pr_repo_meta_id = Column( ForeignKey( - "augur_data.pull_request_meta.pr_repo_meta_id", + "collection_data.pull_request_meta.pr_repo_meta_id", ondelete="CASCADE", onupdate="CASCADE", ) @@ -3585,7 +3585,7 @@ class PullRequestRepo(Base): pr_repo_name = Column(String) pr_repo_full_name = Column(String) pr_repo_private_bool = Column(Boolean) - pr_cntrb_id = Column(ForeignKey("augur_data.contributors.cntrb_id")) + pr_cntrb_id = Column(ForeignKey("collection_data.contributors.cntrb_id")) tool_source = Column(String) tool_version = Column(String) data_source = Column(String) @@ -3609,12 +3609,12 @@ class PullRequestReviewMessageRef(Base): Sequence('pull_request_review_message_ref_pr_review_msg_ref_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq'::regclass)" + "nextval('collection_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq'::regclass)" ), ) pr_review_id = Column( ForeignKey( - "augur_data.pull_request_reviews.pr_review_id", + "collection_data.pull_request_reviews.pr_review_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3624,7 +3624,7 @@ class PullRequestReviewMessageRef(Base): ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3633,7 +3633,7 @@ class PullRequestReviewMessageRef(Base): ) msg_id = Column( ForeignKey( - "augur_data.message.msg_id", + "collection_data.message.msg_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3682,12 +3682,12 @@ class RepoClone(Base): Sequence('repo_clones_data_id_seq', start=1, schema='collection_data'), primary_key=True, server_default=text( - "nextval('augur_data.repo_clones_data_id_seq'::regclass)" + "nextval('collection_data.repo_clones_data_id_seq'::regclass)" ), ) repo_id = Column( ForeignKey( - "augur_data.repo.repo_id", + "collection_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE", deferrable=True, @@ -3713,7 +3713,7 @@ class TopicModelMeta(Base): comment="Unique identifier for the topic model" ) repo_id = Column( - ForeignKey("augur_data.repo.repo_id"), + ForeignKey("collection_data.repo.repo_id"), comment="Repository this model was trained on" ) model_method = Column( @@ -3827,14 +3827,14 @@ class TopicModelEvent(Base): ) repo_id = Column( Integer, - ForeignKey("augur_data.repo.repo_id", name="fk_tme_repo_id"), + ForeignKey("collection_data.repo.repo_id", name="fk_tme_repo_id"), nullable=True, comment="Repository associated with this event" ) model_id = Column( UUID(as_uuid=True), ForeignKey( - "augur_data.topic_model_meta.model_id", + "collection_data.topic_model_meta.model_id", name="fk_tme_model_id", ondelete="SET NULL" ), From cea59b8e0a514a2a031acebd1ab9ac7739c4e014 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:17:28 -0400 Subject: [PATCH 06/26] rename all references to augur_data in the rest of the codebase Signed-off-by: Adrian Edwards --- collectoss/api/metrics/commit.py | 2 +- collectoss/api/metrics/deps.py | 32 ++-- collectoss/api/metrics/message.py | 20 +-- collectoss/api/metrics/pull_request.py | 6 +- collectoss/api/metrics/repo_meta.py | 12 +- collectoss/api/metrics/toss.py | 2 +- collectoss/api/routes/collection_status.py | 18 +-- collectoss/api/routes/complexity.py | 144 +++++++++--------- collectoss/api/routes/metadata.py | 2 +- collectoss/application/cli/backend.py | 2 +- collectoss/application/cli/collection.py | 2 +- collectoss/application/cli/db.py | 4 +- .../data_analysis/clustering_worker/tasks.py | 26 ++-- .../data_analysis/discourse_analysis/tasks.py | 8 +- .../data_analysis/message_insights/tasks.py | 54 +++---- .../pull_request_analysis_worker/tasks.py | 22 +-- .../tasks/db/refresh_materialized_views.py | 28 ++-- .../tasks/github/facade_github/tasks.py | 12 +- collectoss/tasks/util/collection_util.py | 2 +- collectoss/util/repo_load_controller.py | 4 +- conftest.py | 2 +- .../test_application/test_db/test_session.py | 24 +-- .../test_github_tasks/test_pull_requests.py | 10 +- .../test_endpoints.py | 6 +- 24 files changed, 222 insertions(+), 222 deletions(-) diff --git a/collectoss/api/metrics/commit.py b/collectoss/api/metrics/commit.py index de2c84809..3b55a1520 100644 --- a/collectoss/api/metrics/commit.py +++ b/collectoss/api/metrics/commit.py @@ -231,7 +231,7 @@ def annual_commit_count_ranked_by_repo_in_repo_group(repo_group_id, repo_id=None if timeframe == 'all': cdRgTpRankedCommitsSQL = s.sql.text(""" SELECT repo.repo_id, repo_name as name, SUM(added - removed - whitespace) as net, patches - FROM augur_data.dm_repo_annual, repo, repo_groups + FROM collection_data.dm_repo_annual, repo, repo_groups WHERE repo.repo_group_id = :repo_group_id AND repo.repo_group_id = repo_groups.repo_group_id AND dm_repo_annual.repo_id = repo.repo_id diff --git a/collectoss/api/metrics/deps.py b/collectoss/api/metrics/deps.py index ef13aee7d..61f34092f 100644 --- a/collectoss/api/metrics/deps.py +++ b/collectoss/api/metrics/deps.py @@ -33,13 +33,13 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No depsSQL = s.sql.text(""" SELECT - augur_data.repo_dependencies.*, - augur_data.repo_groups.repo_group_id + collection_data.repo_dependencies.*, + collection_data.repo_groups.repo_group_id FROM - augur_data.repo_dependencies, - augur_data.repo_groups, - augur_data.repo, - ( SELECT MAX ( date_trunc( 'day', augur_data.repo_dependencies.data_collection_date ) ) AS data_collection_date FROM repo_dependencies WHERE repo_id = repo_id ) C + collection_data.repo_dependencies, + collection_data.repo_groups, + collection_data.repo, + ( SELECT MAX ( date_trunc( 'day', collection_data.repo_dependencies.data_collection_date ) ) AS data_collection_date FROM repo_dependencies WHERE repo_id = repo_id ) C WHERE repo_dependencies.repo_id = repo.repo_id AND repo.repo_group_id = repo_groups.repo_group_id @@ -54,13 +54,13 @@ def deps(repo_group_id, repo_id=None, period='day', begin_date=None, end_date=No depsSQL = s.sql.text(""" SELECT - augur_data.repo_dependencies.*, - augur_data.repo_groups.repo_group_id + collection_data.repo_dependencies.*, + collection_data.repo_groups.repo_group_id FROM - augur_data.repo_dependencies, - augur_data.repo_groups, - augur_data.repo, - ( SELECT MAX ( date_trunc( 'day', augur_data.repo_dependencies.data_collection_date ) ) AS data_collection_date + collection_data.repo_dependencies, + collection_data.repo_groups, + collection_data.repo, + ( SELECT MAX ( date_trunc( 'day', collection_data.repo_dependencies.data_collection_date ) ) AS data_collection_date FROM repo_dependencies, repo, repo_groups WHERE repo.repo_group_id = repo_groups.repo_group_id and repo_dependencies.repo_id = repo.repo_id and @@ -134,8 +134,8 @@ def libyear(repo_group_id, repo_id=None, period='day', begin_date=None, end_date f.libyear, f.data_collection_date FROM - ( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear WHERE repo_id = :repo_id GROUP BY repo_id, NAME ORDER BY NAME ) e, - augur_data.repo_deps_libyear f + ( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM collection_data.repo_deps_libyear WHERE repo_id = :repo_id GROUP BY repo_id, NAME ORDER BY NAME ) e, + collection_data.repo_deps_libyear f WHERE e.data_collection_date = f.data_collection_date and e.repo_id = f.repo_id @@ -203,8 +203,8 @@ def libyear(repo_group_id, repo_id=None, period='day', begin_date=None, end_date f.libyear, f.data_collection_date FROM - ( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM augur_data.repo_deps_libyear GROUP BY repo_id, NAME ORDER BY NAME ) e, - augur_data.repo_deps_libyear f + ( SELECT repo_id, NAME, MAX ( data_collection_date ) AS data_collection_date FROM collection_data.repo_deps_libyear GROUP BY repo_id, NAME ORDER BY NAME ) e, + collection_data.repo_deps_libyear f WHERE e.data_collection_date = f.data_collection_date and e.repo_id = f.repo_id diff --git a/collectoss/api/metrics/message.py b/collectoss/api/metrics/message.py index f76aabd28..26ce99cf3 100644 --- a/collectoss/api/metrics/message.py +++ b/collectoss/api/metrics/message.py @@ -40,12 +40,12 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en COUNT ( * ), repo_name FROM - augur_data.repo, - augur_data.message + collection_data.repo, + collection_data.message WHERE - augur_data.repo.repo_id = augur_data.message.repo_id + collection_data.repo.repo_id = collection_data.message.repo_id AND - augur_data.repo.repo_id = :repo_id + collection_data.repo.repo_id = :repo_id AND message.msg_timestamp BETWEEN :begin_date AND :end_date GROUP BY @@ -69,14 +69,14 @@ def repo_messages(repo_group_id, repo_id=None, period='day', begin_date=None, en COUNT ( * ), rg_name FROM - augur_data.repo, - augur_data.repo_groups, - augur_data.message + collection_data.repo, + collection_data.repo_groups, + collection_data.message WHERE - augur_data.repo.repo_id = augur_data.message.repo_id - AND augur_data.repo_groups.repo_group_id = repo.repo_group_id + collection_data.repo.repo_id = collection_data.message.repo_id + AND collection_data.repo_groups.repo_group_id = repo.repo_group_id AND - augur_data.repo_groups.repo_group_id = :repo_group_id + collection_data.repo_groups.repo_group_id = :repo_group_id AND message.msg_timestamp BETWEEN :begin_date AND :end_date GROUP BY diff --git a/collectoss/api/metrics/pull_request.py b/collectoss/api/metrics/pull_request.py index 20d6be893..8516ec999 100644 --- a/collectoss/api/metrics/pull_request.py +++ b/collectoss/api/metrics/pull_request.py @@ -787,8 +787,8 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo pr_merged_at, pr_closed_at, pr_created_at - FROM augur_data.pull_request_commits, augur_data.pull_request_meta,augur_data.repo_groups, - augur_data.pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id + FROM collection_data.pull_request_commits, collection_data.pull_request_meta,collection_data.repo_groups, + collection_data.pull_requests JOIN repo ON pull_requests.repo_id = repo.repo_id WHERE pull_requests.repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id) AND pull_requests.pull_request_id = pull_request_commits.pull_request_id @@ -821,7 +821,7 @@ def pull_request_average_commit_counts(repo_group_id, repo_id=None, group_by='mo pr_merged_at, pr_closed_at, pr_created_at - FROM augur_data.pull_request_commits, augur_data.pull_requests, augur_data.pull_request_meta + FROM collection_data.pull_request_commits, collection_data.pull_requests, collection_data.pull_request_meta WHERE pull_requests.pull_request_id = pull_request_commits.pull_request_id AND pull_requests.pull_request_id = pull_request_meta.pull_request_id AND pull_requests.repo_id = :repo_id diff --git a/collectoss/api/metrics/repo_meta.py b/collectoss/api/metrics/repo_meta.py index 7c4129081..a609066ab 100644 --- a/collectoss/api/metrics/repo_meta.py +++ b/collectoss/api/metrics/repo_meta.py @@ -190,7 +190,7 @@ def sbom_download(repo_group_id, repo_id=None): :return: dosocs sbom """ dosocs_SQL = s.sql.text(""" - select * from augur_data.repo_sbom_scans + select * from collection_data.repo_sbom_scans where repo_id = :repo_id; """) @@ -313,7 +313,7 @@ def cii_best_practices_badge(repo_group_id, repo_id=None): if not repo_id: cii_best_practices_badge_SQL = s.sql.text(""" SELECT data - FROM augur_data.repo_badging + FROM collection_data.repo_badging WHERE repo_id IN (SELECT repo_id FROM repo WHERE repo_group_id = :repo_group_id) ORDER BY created_at DESC LIMIT 1 @@ -321,7 +321,7 @@ def cii_best_practices_badge(repo_group_id, repo_id=None): else: cii_best_practices_badge_SQL = s.sql.text(""" SELECT data - FROM augur_data.repo_badging + FROM collection_data.repo_badging WHERE repo_id = :repo_id ORDER BY created_at DESC LIMIT 1 @@ -1270,7 +1270,7 @@ def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None): clone_data_timestamp AS date, count_clones AS total_clones, unique_clones - FROM augur_data.repo_clones_data + FROM collection_data.repo_clones_data WHERE repo_id = :repo_id AND clone_data_timestamp BETWEEN :begin_date AND :end_date ORDER BY clone_data_timestamp @@ -1289,9 +1289,9 @@ def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None): clone_data_timestamp AS date, count_clones AS total_clones, unique_clones - FROM augur_data.repo_clones_data + FROM collection_data.repo_clones_data WHERE repo_id IN ( - SELECT repo_id FROM augur_data.repo WHERE repo_group_id = :repo_group_id + SELECT repo_id FROM collection_data.repo WHERE repo_group_id = :repo_group_id ) AND clone_data_timestamp BETWEEN :begin_date AND :end_date ORDER BY repo_id, clone_data_timestamp diff --git a/collectoss/api/metrics/toss.py b/collectoss/api/metrics/toss.py index 698b4cf31..69597da66 100644 --- a/collectoss/api/metrics/toss.py +++ b/collectoss/api/metrics/toss.py @@ -114,7 +114,7 @@ def toss_repo_info(repo_id): repo_info.default_branch, repo.repo_git FROM - augur_data.repo_info + collection_data.repo_info JOIN repo ON repo.repo_id = repo_info.repo_id WHERE repo_info.repo_id = :repo_id diff --git a/collectoss/api/routes/collection_status.py b/collectoss/api/routes/collection_status.py index eaa374f4c..ba8373440 100644 --- a/collectoss/api/routes/collection_status.py +++ b/collectoss/api/routes/collection_status.py @@ -61,10 +61,10 @@ def issue_collection_status(): # TODO: make this name automatic - wrapper? ( CAST (( COUNT ( * )) +1 AS DOUBLE PRECISION ) / CAST ( b.issues_count + 1 AS DOUBLE PRECISION )) AS ratio_issues FROM - augur_data.repo A, - augur_data.issues d, - augur_data.repo_info b, - ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM augur_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e, + collection_data.repo A, + collection_data.issues d, + collection_data.repo_info b, + ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM collection_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e, ( SELECT repo_id, MAX ( data_collection_date ) AS most_recently_collected_issue FROM issues GROUP BY repo_id ORDER BY repo_id ) f WHERE A.repo_id = b.repo_id @@ -135,11 +135,11 @@ def pull_request_collection_status(): # TODO: make this name automatic - wrappe ABS ( CAST ( ( COUNT ( * ) ) + 1 AS DOUBLE PRECISION ) / CAST ( b.pull_request_count + 1 AS DOUBLE PRECISION ) ) AS ratio_abs, ( CAST ( ( COUNT ( * ) ) + 1 AS DOUBLE PRECISION ) / CAST ( b.pull_request_count + 1 AS DOUBLE PRECISION ) ) AS ratio_issues FROM - augur_data.repo A, - augur_data.pull_requests d, - augur_data.repo_info b, - ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM augur_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e, - ( SELECT repo_id, MAX ( data_collection_date ) AS last_pr_collected FROM augur_data.pull_requests GROUP BY repo_id ORDER BY repo_id ) f + collection_data.repo A, + collection_data.pull_requests d, + collection_data.repo_info b, + ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM collection_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e, + ( SELECT repo_id, MAX ( data_collection_date ) AS last_pr_collected FROM collection_data.pull_requests GROUP BY repo_id ORDER BY repo_id ) f WHERE A.repo_id = b.repo_id AND LOWER ( A.repo_git ) LIKE'%github.com%' diff --git a/collectoss/api/routes/complexity.py b/collectoss/api/routes/complexity.py index 11fbf5ebe..1b1a2c6a5 100644 --- a/collectoss/api/routes/complexity.py +++ b/collectoss/api/routes/complexity.py @@ -17,13 +17,13 @@ def get_project_languages(): project_languages_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.programming_language, e.code_lines, e.files FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, d.programming_language, @@ -31,22 +31,22 @@ def get_project_languages(): COUNT(*)::int AS files FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.programming_language, - augur_data.repo_labor.code_lines + collection_data.repo_labor.repo_id, + collection_data.repo_labor.programming_language, + collection_data.repo_labor.code_lines FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id, d.programming_language) e - WHERE augur_data.repo.repo_id = e.repo_id + WHERE collection_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) @@ -62,30 +62,30 @@ def get_project_files(): project_files_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.files FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, count(*) AS files FROM (SELECT - augur_data.repo_labor.repo_id + collection_data.repo_labor.repo_id FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id + WHERE collection_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) @@ -103,33 +103,33 @@ def get_project_lines(): project_lines_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.total_lines, e.average_lines FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, SUM(d.total_lines) AS total_lines, AVG(d.total_lines)::INT AS average_lines FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.total_lines + collection_data.repo_labor.repo_id, + collection_data.repo_labor.total_lines FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id and augur_data.repo.repo_id = :repo_id_param + WHERE collection_data.repo.repo_id = e.repo_id and collection_data.repo.repo_id = :repo_id_param ORDER BY e.repo_id """).bindparams(repo_id_param=repo_id) @@ -147,33 +147,33 @@ def get_project_comment_lines(): comment_lines_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.comment_lines, e.avg_comment_lines FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, SUM(d.comment_lines) AS comment_lines, AVG(d.comment_lines)::INT AS avg_comment_lines FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.comment_lines + collection_data.repo_labor.repo_id, + collection_data.repo_labor.comment_lines FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id + WHERE collection_data.repo.repo_id = e.repo_id AND e.repo_id = :repo_id_param ORDER BY e.repo_id """).bindparams(repo_id_param=repo_id) @@ -192,33 +192,33 @@ def get_project_blank_lines(): blank_lines_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.blank_lines, e.avg_blank_lines FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, SUM(d.blank_lines) AS blank_lines, AVG(d.blank_lines)::int AS avg_blank_lines FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.blank_lines + collection_data.repo_labor.repo_id, + collection_data.repo_labor.blank_lines FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id + WHERE collection_data.repo.repo_id = e.repo_id AND e.repo_id = :repo_id_param ORDER BY e.repo_id """).bindparams(repo_id_param=repo_id) @@ -236,33 +236,33 @@ def get_project_file_complexity(): project_file_complexity_sql = s.sql.text(""" SELECT e.repo_id, - augur_data.repo.repo_git, - augur_data.repo.repo_name, + collection_data.repo.repo_git, + collection_data.repo.repo_name, e.sum_code_complexity, e.average_code_complexity FROM - augur_data.repo, + collection_data.repo, (SELECT d.repo_id, SUM(d.code_complexity) AS sum_code_complexity, AVG(d.code_complexity)::int AS average_code_complexity FROM (SELECT - augur_data.repo_labor.repo_id, - augur_data.repo_labor.code_complexity + collection_data.repo_labor.repo_id, + collection_data.repo_labor.code_complexity FROM - augur_data.repo_labor, + collection_data.repo_labor, ( SELECT - augur_data.repo_labor.repo_id, + collection_data.repo_labor.repo_id, MAX ( data_collection_date ) AS last_collected FROM - augur_data.repo_labor - GROUP BY augur_data.repo_labor.repo_id) recent + collection_data.repo_labor + GROUP BY collection_data.repo_labor.repo_id) recent WHERE - augur_data.repo_labor.repo_id = recent.repo_id - AND augur_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d + collection_data.repo_labor.repo_id = recent.repo_id + AND collection_data.repo_labor.data_collection_date > recent.last_collected - (5 * interval '1 minute')) d GROUP BY d.repo_id) e - WHERE augur_data.repo.repo_id = e.repo_id + WHERE collection_data.repo.repo_id = e.repo_id ORDER BY e.repo_id """) diff --git a/collectoss/api/routes/metadata.py b/collectoss/api/routes/metadata.py index edd65f595..7b09cfabe 100644 --- a/collectoss/api/routes/metadata.py +++ b/collectoss/api/routes/metadata.py @@ -31,7 +31,7 @@ def get_repo_info(): FROM repo_info, repo, - ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM augur_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e + ( SELECT repo_id, MAX ( data_collection_date ) AS last_collected FROM collection_data.repo_info GROUP BY repo_id ORDER BY repo_id ) e WHERE repo_info.repo_id = repo.repo_id AND e.repo_id = repo_info.repo_id diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index a07ddf198..f85a4e105 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -386,7 +386,7 @@ def repo_reset(backend_app): UPDATE augur_operations.collection_status SET facade_status='Pending', facade_task_id=NULL, facade_data_last_collected = NULL; - TRUNCATE augur_data.commits CASCADE; + TRUNCATE collection_data.commits CASCADE; """)) logger.info("Repos successfully reset") diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index b1a93ce80..3f46d10d1 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -211,7 +211,7 @@ def repo_reset(ctx): UPDATE augur_operations.collection_status SET facade_status='Pending', facade_task_id=NULL, facade_data_last_collected = NULL; - TRUNCATE augur_data.commits CASCADE; + TRUNCATE collection_data.commits CASCADE; """)) logger.info("Repos successfully reset") diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 1a790e3c0..6bc475711 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -140,7 +140,7 @@ def get_repo_groups(ctx: click.Context) -> pd.DataFrame: with ctx.obj.engine.connect() as connection: df = pd.read_sql( s.sql.text( - "SELECT repo_group_id, rg_name, rg_description FROM augur_data.repo_groups" + "SELECT repo_group_id, rg_name, rg_description FROM collection_data.repo_groups" ), connection, ) @@ -179,7 +179,7 @@ def add_repo_groups(ctx: click.Context, filename: str) -> None: with ctx.obj.engine.begin() as connection: # Get existing repo group IDs df = pd.read_sql( - s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"), + s.sql.text("SELECT repo_group_id FROM collection_data.repo_groups"), connection, ) repo_group_IDs = df["repo_group_id"].values.tolist() diff --git a/collectoss/tasks/data_analysis/clustering_worker/tasks.py b/collectoss/tasks/data_analysis/clustering_worker/tasks.py index c9e269e5f..7ec48414d 100644 --- a/collectoss/tasks/data_analysis/clustering_worker/tasks.py +++ b/collectoss/tasks/data_analysis/clustering_worker/tasks.py @@ -78,10 +78,10 @@ def clustering_model(repo_git: str,logger,engine) -> None: i.issue_title thread_title, M.msg_id FROM - augur_data.repo r, - augur_data.issues i, - augur_data.message M, - augur_data.issue_message_ref imr + collection_data.repo r, + collection_data.issues i, + collection_data.message M, + collection_data.issue_message_ref imr WHERE r.repo_id = i.repo_id AND imr.issue_id = i.issue_id @@ -98,10 +98,10 @@ def clustering_model(repo_git: str,logger,engine) -> None: pr.pr_src_title thread_title, M.msg_id FROM - augur_data.repo r, - augur_data.pull_requests pr, - augur_data.message M, - augur_data.pull_request_message_ref prmr + collection_data.repo r, + collection_data.pull_requests pr, + collection_data.message M, + collection_data.pull_request_message_ref prmr WHERE r.repo_id = pr.repo_id AND prmr.pull_request_id = pr.pull_request_id @@ -289,15 +289,15 @@ def visualize_labels_PCA(features, labels, annotations, num_components, title): get_messages_sql = s.sql.text( """ SELECT r.repo_group_id, r.repo_id, r.repo_git, r.repo_name, i.issue_id thread_id,m.msg_text,i.issue_title thread_title,m.msg_id - FROM augur_data.repo r, augur_data.issues i, - augur_data.message m, augur_data.issue_message_ref imr + FROM collection_data.repo r, collection_data.issues i, + collection_data.message m, collection_data.issue_message_ref imr WHERE r.repo_id=i.repo_id AND imr.issue_id=i.issue_id AND imr.msg_id=m.msg_id UNION SELECT r.repo_group_id, r.repo_id, r.repo_git, r.repo_name, pr.pull_request_id thread_id,m.msg_text,pr.pr_src_title thread_title,m.msg_id - FROM augur_data.repo r, augur_data.pull_requests pr, - augur_data.message m, augur_data.pull_request_message_ref prmr + FROM collection_data.repo r, collection_data.pull_requests pr, + collection_data.message m, collection_data.pull_request_message_ref prmr WHERE r.repo_id=pr.repo_id AND prmr.pull_request_id=pr.pull_request_id AND prmr.msg_id=m.msg_id @@ -365,7 +365,7 @@ def visualize_labels_PCA(features, labels, annotations, num_components, title): # key_sequence_words_sql = s.sql.text( # """ - # SELECT nextval('augur_data.topic_words_topic_words_id_seq'::text) + # SELECT nextval('collection_data.topic_words_topic_words_id_seq'::text) # """ # ) diff --git a/collectoss/tasks/data_analysis/discourse_analysis/tasks.py b/collectoss/tasks/data_analysis/discourse_analysis/tasks.py index a95756b8c..cad3856ab 100644 --- a/collectoss/tasks/data_analysis/discourse_analysis/tasks.py +++ b/collectoss/tasks/data_analysis/discourse_analysis/tasks.py @@ -51,16 +51,16 @@ def discourse_analysis_model(repo_git: str,logger,engine) -> None: get_messages_for_repo_sql = s.sql.text(""" (SELECT r.repo_group_id, r.repo_id, r.repo_git, r.repo_name, i.issue_id thread_id,m.msg_text,i.issue_title thread_title,m.msg_id - FROM augur_data.repo r, augur_data.issues i, - augur_data.message m, augur_data.issue_message_ref imr + FROM collection_data.repo r, collection_data.issues i, + collection_data.message m, collection_data.issue_message_ref imr WHERE r.repo_id=i.repo_id AND imr.issue_id=i.issue_id AND imr.msg_id=m.msg_id AND r.repo_id = :repo_id) UNION (SELECT r.repo_group_id, r.repo_id, r.repo_git, r.repo_name, pr.pull_request_id thread_id,m.msg_text,pr.pr_src_title thread_title,m.msg_id - FROM augur_data.repo r, augur_data.pull_requests pr, - augur_data.message m, augur_data.pull_request_message_ref prmr + FROM collection_data.repo r, collection_data.pull_requests pr, + collection_data.message m, collection_data.pull_request_message_ref prmr WHERE r.repo_id=pr.repo_id AND prmr.pull_request_id=pr.pull_request_id AND prmr.msg_id=m.msg_id diff --git a/collectoss/tasks/data_analysis/message_insights/tasks.py b/collectoss/tasks/data_analysis/message_insights/tasks.py index 7913a5d13..f01de4305 100644 --- a/collectoss/tasks/data_analysis/message_insights/tasks.py +++ b/collectoss/tasks/data_analysis/message_insights/tasks.py @@ -52,7 +52,7 @@ def message_insight_model(repo_git: str,logger,engine) -> None: # Check to see if repo has been analyzed previously repo_exists_SQL = s.sql.text(""" - SELECT exists (SELECT 1 FROM augur_data.message_analysis_summary WHERE repo_id = :repo_id LIMIT 1)""") + SELECT exists (SELECT 1 FROM collection_data.message_analysis_summary WHERE repo_id = :repo_id LIMIT 1)""") with engine.connect() as conn: df_rep = pd.read_sql_query(repo_exists_SQL, conn, params={'repo_id': repo_id}) @@ -66,17 +66,17 @@ def message_insight_model(repo_git: str,logger,engine) -> None: # Fetch the timestamp of last analyzed message for the repo past_SQL = s.sql.text(""" select message_analysis.msg_id, message.msg_timestamp - from augur_data.message_analysis - inner join augur_data.message on message.msg_id = message_analysis.msg_id - inner join augur_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id - inner join augur_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id + from collection_data.message_analysis + inner join collection_data.message on message.msg_id = message_analysis.msg_id + inner join collection_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id + inner join collection_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where message.repo_id = :repo_id UNION select message_analysis.msg_id, message.msg_timestamp - from augur_data.message_analysis - inner join augur_data.message on message.msg_id = message_analysis.msg_id - inner join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id - inner join augur_data.issues on issue_message_ref.issue_id = issues.issue_id + from collection_data.message_analysis + inner join collection_data.message on message.msg_id = message_analysis.msg_id + inner join collection_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id + inner join collection_data.issues on issue_message_ref.issue_id = issues.issue_id where message.repo_id = :repo_id """) @@ -97,28 +97,28 @@ def message_insight_model(repo_git: str,logger,engine) -> None: # Fetch only recent messages join_SQL = s.sql.text(""" - select message.msg_id, msg_timestamp, msg_text from augur_data.message - left outer join augur_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id - left outer join augur_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id + select message.msg_id, msg_timestamp, msg_text from collection_data.message + left outer join collection_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id + left outer join collection_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where message.repo_id = :repo_id and msg_timestamp > :begin_date UNION - select message.msg_id, msg_timestamp, msg_text from augur_data.message - left outer join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id - left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id + select message.msg_id, msg_timestamp, msg_text from collection_data.message + left outer join collection_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id + left outer join collection_data.issues on issue_message_ref.issue_id = issues.issue_id where message.repo_id = :repo_id and msg_timestamp > :begin_date""") else: logger.info(f'Fetching all past messages of repo {repo_id}...') # Fetch all messages join_SQL = s.sql.text(""" - select message.msg_id, msg_timestamp, msg_text from augur_data.message - left outer join augur_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id - left outer join augur_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id + select message.msg_id, msg_timestamp, msg_text from collection_data.message + left outer join collection_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id + left outer join collection_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where message.repo_id = :repo_id UNION - select message.msg_id, msg_timestamp, msg_text from augur_data.message - left outer join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id - left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id + select message.msg_id, msg_timestamp, msg_text from collection_data.message + left outer join collection_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id + left outer join collection_data.issues on issue_message_ref.issue_id = issues.issue_id where message.repo_id = :repo_id""") with engine.connect() as conn: @@ -147,14 +147,14 @@ def message_insight_model(repo_git: str,logger,engine) -> None: if not full_train: merge_SQL = s.sql.text(""" - select novelty_flag, reconstruction_error from augur_data.message_analysis - left outer join augur_data.pull_request_message_ref on message_analysis.msg_id = pull_request_message_ref.msg_id - left outer join augur_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id + select novelty_flag, reconstruction_error from collection_data.message_analysis + left outer join collection_data.pull_request_message_ref on message_analysis.msg_id = pull_request_message_ref.msg_id + left outer join collection_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where pull_request_message_ref.repo_id = :repo_id UNION - select novelty_flag, reconstruction_error from augur_data.message_analysis - left outer join augur_data.issue_message_ref on message_analysis.msg_id = issue_message_ref.msg_id - left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id + select novelty_flag, reconstruction_error from collection_data.message_analysis + left outer join collection_data.issue_message_ref on message_analysis.msg_id = issue_message_ref.msg_id + left outer join collection_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""") with engine.connect() as conn: diff --git a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py index aa8d5a0a0..2c6e4365e 100644 --- a/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py +++ b/collectoss/tasks/data_analysis/pull_request_analysis_worker/tasks.py @@ -59,8 +59,8 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: pull_request_commits.pr_cmt_id, pr_augur_contributor_id, pr_src_author_association - from augur_data.pull_requests - INNER JOIN augur_data.pull_request_commits on pull_requests.pull_request_id = pull_request_commits.pull_request_id + from collection_data.pull_requests + INNER JOIN collection_data.pull_request_commits on pull_requests.pull_request_id = pull_request_commits.pull_request_id where pr_created_at > :begin_date and pull_requests.repo_id = :repo_id and pr_src_state like 'open' @@ -90,13 +90,13 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: # Get sentiment score of all messages relating to the PR messages_SQL = s.sql.text(""" - select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from augur_data.message - left outer join augur_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id - left outer join augur_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where pull_request_message_ref.repo_id = :repo_id + select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from collection_data.message + left outer join collection_data.pull_request_message_ref on message.msg_id = pull_request_message_ref.msg_id + left outer join collection_data.pull_requests on pull_request_message_ref.pull_request_id = pull_requests.pull_request_id where pull_request_message_ref.repo_id = :repo_id UNION - select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from augur_data.message - left outer join augur_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id - left outer join augur_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""") + select message.msg_id, msg_timestamp, msg_text, message.cntrb_id from collection_data.message + left outer join collection_data.issue_message_ref on message.msg_id = issue_message_ref.msg_id + left outer join collection_data.issues on issue_message_ref.issue_id = issues.issue_id where issue_message_ref.repo_id = :repo_id""") with engine.connect() as conn: df_message = pd.read_sql_query(messages_SQL, conn, params={'repo_id': repo_id}) @@ -104,7 +104,7 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: # Map PR to its corresponding messages - pr_ref_sql = s.sql.text("select * from augur_data.pull_request_message_ref") + pr_ref_sql = s.sql.text("select * from collection_data.pull_request_message_ref") with engine.connect() as conn: df_pr_ref = pd.read_sql_query(pr_ref_sql, conn) df_merge = pd.merge(df_pr, df_pr_ref, on='pull_request_id', how='left') @@ -142,7 +142,7 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: ''' # Get cntrb info from API - cntrb_sql = 'SELECT cntrb_id, gh_login FROM augur_data.contributors' + cntrb_sql = 'SELECT cntrb_id, gh_login FROM collection_data.contributors' df_ctrb = pd.read_sql_query(cntrb_SQL, create_database_engine()) df_fin1 = pd.merge(df_fin,df_ctrb,left_on='pr_augur_contributor_id', right_on='cntrb_id', how='left') df_fin1 = df_fin1.drop(['cntrb_id'],axis=1) @@ -157,7 +157,7 @@ def pull_request_analysis_model(repo_git: str,logger,engine) -> None: # Get repo info repo_sql = s.sql.text(""" SELECT repo_id, pull_requests_merged, pull_request_count,watchers_count, last_updated FROM - augur_data.repo_info where repo_id = :repo_id + collection_data.repo_info where repo_id = :repo_id """) with engine.connect() as conn: diff --git a/collectoss/tasks/db/refresh_materialized_views.py b/collectoss/tasks/db/refresh_materialized_views.py index 95f169722..d8eeabf97 100644 --- a/collectoss/tasks/db/refresh_materialized_views.py +++ b/collectoss/tasks/db/refresh_materialized_views.py @@ -19,78 +19,78 @@ def refresh_materialized_views(self): #self.logger = logging.getLogger(refresh_materialized_views.__name__) mv1_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repo_prs with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.api_get_all_repo_prs with data; COMMIT; """) mv2_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_commits with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.api_get_all_repos_commits with data; COMMIT; """) mv3_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.api_get_all_repos_issues with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.api_get_all_repos_issues with data; COMMIT; """) mv4_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.augur_new_contributors with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.augur_new_contributors with data; COMMIT; """) mv5_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_commits_and_committers_daily_count with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_commits_and_committers_daily_count with data; COMMIT; """) mv6_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_new_contributors with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_new_contributors with data; COMMIT; """) mv7_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_entry_list with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_entry_list with data; COMMIT; """) mv8_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_contributor_actions with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_contributor_actions with data; COMMIT; """) mv9_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_user_repos with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_user_repos with data; COMMIT; """) mv10_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response_times with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_pr_response_times with data; COMMIT; """) mv11_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_assignments with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_pr_assignments with data; COMMIT; """) mv12_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_issue_assignments with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_issue_assignments with data; COMMIT; """) mv13_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_pr_response with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_pr_response with data; COMMIT; """) mv14_refresh = s.sql.text(""" - REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_repo_languages with data; + REFRESH MATERIALIZED VIEW concurrently collection_data.explorer_repo_languages with data; COMMIT; """) diff --git a/collectoss/tasks/github/facade_github/tasks.py b/collectoss/tasks/github/facade_github/tasks.py index ab7a18eab..732b70dcc 100644 --- a/collectoss/tasks/github/facade_github/tasks.py +++ b/collectoss/tasks/github/facade_github/tasks.py @@ -207,12 +207,12 @@ def insert_facade_contributors(self, repo_git): commits.cmt_commit_hash AS hash, commits.cmt_author_raw_email AS email_raw FROM - augur_data.commits + collection_data.commits WHERE commits.repo_id = :repo_id AND commits.cmt_ght_author_id IS NULL AND commits.cmt_author_raw_email NOT IN ( - SELECT email FROM augur_data.unresolved_commit_emails + SELECT email FROM collection_data.unresolved_commit_emails ) """).bindparams(repo_id=repo_id) @@ -253,19 +253,19 @@ def insert_facade_contributors(self, repo_git): resolve_email_to_cntrb_id_sql = s.sql.text(""" WITH email_to_contributor AS ( SELECT cntrb_email AS email, cntrb_id - FROM augur_data.contributors + FROM collection_data.contributors WHERE cntrb_email IS NOT NULL UNION ALL SELECT cntrb_canonical AS email, cntrb_id - FROM augur_data.contributors + FROM collection_data.contributors WHERE cntrb_canonical IS NOT NULL UNION ALL SELECT alias_email AS email, cntrb_id - FROM augur_data.contributors_aliases + FROM collection_data.contributors_aliases WHERE alias_email IS NOT NULL ), deduplicated AS ( @@ -277,7 +277,7 @@ def insert_facade_contributors(self, repo_git): d.cntrb_id, c.cmt_author_email AS email FROM - augur_data.commits c + collection_data.commits c INNER JOIN deduplicated d ON c.cmt_author_email = d.email diff --git a/collectoss/tasks/util/collection_util.py b/collectoss/tasks/util/collection_util.py index 9ca6bb059..3781377b0 100644 --- a/collectoss/tasks/util/collection_util.py +++ b/collectoss/tasks/util/collection_util.py @@ -74,7 +74,7 @@ def get_newly_added_repos(session, limit, hook): repo_query = s.sql.text(f""" select repo_git - from augur_operations.collection_status x, augur_data.repo y + from augur_operations.collection_status x, collection_data.repo y where x.repo_id=y.repo_id and {condition_string} order by {order_by_field} diff --git a/collectoss/util/repo_load_controller.py b/collectoss/util/repo_load_controller.py index 5455411e4..7e3a0548a 100644 --- a/collectoss/util/repo_load_controller.py +++ b/collectoss/util/repo_load_controller.py @@ -22,8 +22,8 @@ augur_data_schema = MetaData(schema = "augur_data") augur_data_schema.reflect(bind = engine, views = True) - commits_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_commits"] - issues_materialized_view: Table = augur_data_schema.tables["augur_data.api_get_all_repos_issues"] + commits_materialized_view: Table = augur_data_schema.tables["collection_data.api_get_all_repos_commits"] + issues_materialized_view: Table = augur_data_schema.tables["collection_data.api_get_all_repos_issues"] class RepoLoadController: diff --git a/conftest.py b/conftest.py index db2e95b78..38f7e7055 100644 --- a/conftest.py +++ b/conftest.py @@ -195,7 +195,7 @@ def read_only_db(empty_db): database_name = empty_db.url.database test_username = "testuser" test_password = "testpass" - schemas = ["public", "augur_data", "augur_operations"] + schemas = ["public", "collection_data", "augur_operations"] # create read-only user empty_db.execute(s.text(f"CREATE USER testuser WITH PASSWORD '{test_password}';")) diff --git a/tests/test_application/test_db/test_session.py b/tests/test_application/test_db/test_session.py index 36698b217..856a3f194 100644 --- a/tests/test_application/test_db/test_session.py +++ b/tests/test_application/test_db/test_session.py @@ -35,7 +35,7 @@ def test_execute_sql(test_db_engine): with DatabaseSession(logger, engine=test_db_engine) as session: cntrb_id = data['cntrb_id'] - result = session.execute_sql(f"SELECT * FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() + result = session.execute_sql(f"SELECT * FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() assert result is not None assert isinstance(result[0], s.engine.result.RowProxy) @@ -57,7 +57,7 @@ def test_execute_sql(test_db_engine): for data in all_data: cntrb_id = data["cntrb_id"] - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}';") def test_insert_data_with_duplicates(test_db_engine): @@ -79,7 +79,7 @@ def test_insert_data_with_duplicates(test_db_engine): cntrb_id = data_1['cntrb_id'] - result = session.execute_sql(f"SELECT * FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() + result = session.execute_sql(f"SELECT * FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() assert result is not None assert len(result) == 3 @@ -94,7 +94,7 @@ def test_insert_data_with_duplicates(test_db_engine): for data in duplicate_data_list: cntrb_id = data["cntrb_id"] - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}';") def test_insert_data_with_updates(test_db_engine): @@ -117,7 +117,7 @@ def test_insert_data_with_updates(test_db_engine): with test_db_engine.connect() as connection: cntrb_id = data_1['cntrb_id'] - result = connection.execute(f"SELECT * FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() + result = connection.execute(f"SELECT * FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() assert result is not None assert dict(result[0])["gh_user_id"] == 6 @@ -127,7 +127,7 @@ def test_insert_data_with_updates(test_db_engine): with test_db_engine.connect() as connection: cntrb_id = data_1["cntrb_id"] - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}';") def test_insert_data_with_bulk(test_db_engine): @@ -145,7 +145,7 @@ def test_insert_data_with_bulk(test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute(f"SELECT * FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() + result = connection.execute(f"SELECT * FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() assert result is not None assert len(result) == 4 @@ -160,7 +160,7 @@ def test_insert_data_with_bulk(test_db_engine): with test_db_engine.connect() as connection: cntrb_id = data_1["cntrb_id"] - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") @@ -183,7 +183,7 @@ def test_insert_data_partial_update(test_db_engine): with test_db_engine.connect() as connection: cntrb_id = data_1['cntrb_id'] - result = connection.execute(f"SELECT * FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() + result = connection.execute(f"SELECT * FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}'").fetchall() assert result is not None assert dict(result[0])["gh_user_id"] == 6 @@ -193,7 +193,7 @@ def test_insert_data_partial_update(test_db_engine): with test_db_engine.connect() as connection: cntrb_id = data_1["cntrb_id"] - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id='{cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id='{cntrb_id}';") issue_data_with_null_strings = [] @@ -232,7 +232,7 @@ def test_insert_issue_data_with_invalid_strings(test_db_engine): return_columns=issue_return_columns, string_fields=issue_string_columns) data_inserted_count = len(issue_data_with_null_strings) - result = connection.execute(f"Select * FROM augur_data.issues;").fetchall() + result = connection.execute(f"Select * FROM collection_data.issues;").fetchall() assert issue_return_data is not None assert len(issue_return_data) == data_inserted_count @@ -242,7 +242,7 @@ def test_insert_issue_data_with_invalid_strings(test_db_engine): with test_db_engine.connect() as connection: connection.execute(""" - DELETE FROM augur_data.issues; + DELETE FROM collection_data.issues; DELETE FROM "augur_data"."repo"; DELETE FROM "augur_data"."repo_groups"; """) diff --git a/tests/test_tasks/test_github_tasks/test_pull_requests.py b/tests/test_tasks/test_github_tasks/test_pull_requests.py index 4dc2c9e73..83751ea22 100644 --- a/tests/test_tasks/test_github_tasks/test_pull_requests.py +++ b/tests/test_tasks/test_github_tasks/test_pull_requests.py @@ -263,7 +263,7 @@ def test_insert_pr_contributors(github_api_key_headers, test_db_session, pr_numb with test_db_session.engine.connect() as connection: - result = connection.execute(f"SELECT * FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() + result = connection.execute(f"SELECT * FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}'").fetchall() assert result is not None assert len(result) == len(unique_contributors) @@ -277,7 +277,7 @@ def test_insert_pr_contributors(github_api_key_headers, test_db_session, pr_numb with test_db_session.engine.connect() as connection: - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") repos = [] repos.append({"owner": "chaoss", "repo": "augur"}) @@ -336,7 +336,7 @@ def test_insert_prs(github_api_key_headers, test_db_session, repo): with test_db_session.engine.connect() as connection: - result = connection.execute(f"SELECT * FROM augur_data.pull_requests;").fetchall() + result = connection.execute(f"SELECT * FROM collection_data.pull_requests;").fetchall() assert result is not None assert len(result) == len(prs) == len(return_data) @@ -353,11 +353,11 @@ def test_insert_prs(github_api_key_headers, test_db_session, repo): with test_db_session.engine.connect() as connection: - connection.execute(f"DELETE FROM augur_data.pull_requests;") + connection.execute(f"DELETE FROM collection_data.pull_requests;") connection.execute("""DELETE FROM "augur_data"."repo"; DELETE FROM "augur_data"."repo_groups"; """) - connection.execute(f"DELETE FROM augur_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") + connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py index 980f09fb3..14bd8bfe5 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py @@ -9,7 +9,7 @@ @pytest.fixture def set_up_repo_groups(database_connection): - df = pd.read_sql(s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"), database_connection) + df = pd.read_sql(s.sql.text("SELECT repo_group_id FROM collection_data.repo_groups"), database_connection) repo_group_IDs = df['repo_group_id'].values.tolist() insert_repo_group_sql = s.sql.text(""" @@ -31,12 +31,12 @@ def set_up_repo_groups(database_connection): - df = database_connection.execute(s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups")) + df = database_connection.execute(s.sql.text("SELECT repo_group_id FROM collection_data.repo_groups")) repo_group_IDs = [group[0] for group in df.fetchall()] insertSQL = s.sql.text(""" - INSERT INTO augur_data.repo(repo_group_id, repo_git, + INSERT INTO collection_data.repo(repo_group_id, repo_git, tool_source, tool_version, data_source, data_collection_date) VALUES (:repo_group_id, :repo_git, 'CLI', 1.0, 'Git', CURRENT_TIMESTAMP) """) From cf6b6510d516d0810eadbf3d1bcca3a177aff6c3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:44:29 -0400 Subject: [PATCH 07/26] rename augur_operations file to operations Signed-off-by: Adrian Edwards --- collectoss/application/db/models/__init__.py | 2 +- .../db/models/{augur_operations.py => operations.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename collectoss/application/db/models/{augur_operations.py => operations.py} (100%) diff --git a/collectoss/application/db/models/__init__.py b/collectoss/application/db/models/__init__.py index 11c6b38fe..80d3cf9b4 100644 --- a/collectoss/application/db/models/__init__.py +++ b/collectoss/application/db/models/__init__.py @@ -95,7 +95,7 @@ SpdxIdentifier, ) -from collectoss.application.db.models.augur_operations import ( +from collectoss.application.db.models.operations import ( Settings, WorkerHistory, WorkerJob, diff --git a/collectoss/application/db/models/augur_operations.py b/collectoss/application/db/models/operations.py similarity index 100% rename from collectoss/application/db/models/augur_operations.py rename to collectoss/application/db/models/operations.py From 5ddd11552a338f811494a33b23bb7c730c12ca5a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:48:14 -0400 Subject: [PATCH 08/26] rename operations schema specifiers in tables and columns Signed-off-by: Adrian Edwards --- .../application/db/models/operations.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/collectoss/application/db/models/operations.py b/collectoss/application/db/models/operations.py index 68aaac06d..34c99fe0e 100644 --- a/collectoss/application/db/models/operations.py +++ b/collectoss/application/db/models/operations.py @@ -83,20 +83,20 @@ def retrieve_owner_repos(session, owner: str) -> List[str]: Column("Count", String), Column("WeightedComplexity", String), Column("Files", String), - schema="augur_operations", + schema="collection_operations", ) class Settings(Base): __tablename__ = "augur_settings" __table_args__ = { - "schema": "augur_operations", + "schema": "collection_operations", "comment": "CollectOSS settings include the schema version, and the CollectOSS API Key as of 10/25/2020. Future augur settings may be stored in this table, which has the basic structure of a name-value pair. ", } id = Column( BigInteger, - Sequence("augur_settings_id_seq", start=1, schema="augur_operations"), + Sequence("augur_settings_id_seq", start=1, schema="collection_operations"), primary_key=True, server_default=text( "nextval('augur_operations.augur_settings_id_seq'::regclass)" @@ -119,20 +119,20 @@ class Settings(Base): server_default=text("CURRENT_TIMESTAMP"), ), Index("repos_id,statusops", "repos_id", "status"), - schema="augur_operations", + schema="collection_operations", comment="For future use when we move all working tables to the augur_operations schema. ", ) class WorkerHistory(Base): __tablename__ = "worker_history" __table_args__ = { - "schema": "augur_operations", + "schema": "collection_operations", "comment": "This table stores the complete history of job execution, including success and failure. It is useful for troubleshooting. ", } history_id = Column( BigInteger, - Sequence("gh_worker_history_history_id_seq", start=1, schema="augur_operations"), + Sequence("gh_worker_history_history_id_seq", start=1, schema="collection_operations"), primary_key=True, server_default=text( "nextval('augur_operations.gh_worker_history_history_id_seq'::regclass)" @@ -150,7 +150,7 @@ class WorkerHistory(Base): class WorkerJob(Base): __tablename__ = "worker_job" __table_args__ = { - "schema": "augur_operations", + "schema": "collection_operations", "comment": "This table stores the jobs workers collect data for. A job is found in the code, and in the augur.config.json under the construct of a “model”. ", } @@ -172,13 +172,13 @@ class WorkerJob(Base): class WorkerOauth(Base): __tablename__ = "worker_oauth" __table_args__ = { - "schema": "augur_operations", + "schema": "collection_operations", "comment": "This table stores credentials for retrieving data from platform API’s. Entries in this table must comply with the terms of service for each platform. ", } oauth_id = Column( BigInteger, - Sequence("worker_oauth_oauth_id_seq", start=1000, schema="augur_operations"), + Sequence("worker_oauth_oauth_id_seq", start=1000, schema="collection_operations"), primary_key=True, server_default=text( "nextval('augur_operations.worker_oauth_oauth_id_seq'::regclass)" @@ -196,7 +196,7 @@ class WorkerOauth(Base): class WorkerSettingsFacade(Base): __tablename__ = "worker_settings_facade" __table_args__ = { - "schema": "augur_operations", + "schema": "collection_operations", "comment": "For future use when we move all working tables to the augur_operations schema. ", } @@ -215,7 +215,7 @@ class WorkerSettingsFacade(Base): Column( "working_commit", String(40), server_default=text("'NULL'::character varying") ), - schema="augur_operations", + schema="collection_operations", comment="For future use when we move all working tables to the augur_operations schema. ", ) @@ -237,7 +237,7 @@ class Config(Base): __tablename__ = 'config' __table_args__ = ( UniqueConstraint('section_name', "setting_name", name='unique-config-setting'), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) id = Column(SmallInteger, primary_key=True, nullable=False) @@ -255,7 +255,7 @@ class User(Base): UniqueConstraint('email', name='user-unique-email'), UniqueConstraint('login_name', name='user-unique-name'), UniqueConstraint('text_phone', name='user-unique-phone'), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) user_id = Column(Integer, primary_key=True) @@ -634,7 +634,7 @@ class UserGroup(Base): __tablename__ = 'user_groups' __table_args__ = ( UniqueConstraint('user_id', 'name', name='user_groups_user_id_name_key'), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) group_id = Column(BigInteger, primary_key=True) @@ -743,7 +743,7 @@ def convert_group_name_to_id(session, user_id: int, group_name: str) -> int: class UserRepo(Base): __tablename__ = "user_repos" - __table_args__ = { "schema": "augur_operations" } + __table_args__ = { "schema": "collection_operations" } group_id = Column( ForeignKey("augur_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False @@ -1010,7 +1010,7 @@ def add_github_org_repos(session, url: List[str], user_id: int, group_name: int) class UserSessionToken(Base): __tablename__ = "user_session_tokens" - __table_args__ = { "schema": "augur_operations" } + __table_args__ = { "schema": "collection_operations" } token = Column(String, primary_key=True, nullable=False) user_id = Column(ForeignKey("augur_operations.users.user_id", name="user_session_token_user_id_fkey"), nullable=False) @@ -1048,7 +1048,7 @@ def delete_refresh_tokens(self, session): class ClientApplication(Base): __tablename__ = "client_applications" - __table_args__ = { "schema": "augur_operations" } + __table_args__ = { "schema": "collection_operations" } id = Column(String, primary_key=True, nullable=False) user_id = Column(ForeignKey("augur_operations.users.user_id", name="client_application_user_id_fkey"), nullable=False) @@ -1074,7 +1074,7 @@ def get_by_id(session, client_id): class ForgeInstance(Base): __tablename__ = "forge_instance" - __table_args__ = { "schema": "augur_operations" } + __table_args__ = { "schema": "collection_operations" } id = Column(Integer, primary_key=True, nullable=False, comment="Internal unique identifier for this forge instance") # platform_type stores an integer that CollectOSS maps/will map to it's internal platform identifier Enum @@ -1089,7 +1089,7 @@ class ForgeInstance(Base): class Subscription(Base): __tablename__ = "subscriptions" - __table_args__ = { "schema": "augur_operations" } + __table_args__ = { "schema": "collection_operations" } application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True) type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True) @@ -1101,7 +1101,7 @@ class SubscriptionType(Base): __tablename__ = "subscription_types" __table_args__ = ( UniqueConstraint('name', name='subscription_type_title_unique'), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) @@ -1115,7 +1115,7 @@ class RefreshToken(Base): __tablename__ = "refresh_tokens" __table_args__ = ( UniqueConstraint('user_session_token', name='refresh_token_user_session_token_id_unique'), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) id = Column(String, primary_key=True) @@ -1201,7 +1201,7 @@ class CollectionStatus(Base): "NOT (core_status = 'Pending' AND secondary_status = 'Collecting')", name='core_secondary_dependency_check' ), - {"schema": "augur_operations"} + {"schema": "collection_operations"} ) repo_id = Column(ForeignKey("collection_data.repo.repo_id", name="collection_status_repo_id_fk"), primary_key=True) From 56627b6bd7c046897d2aaa9f72b9dfcdd5c92e6d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:48:29 -0400 Subject: [PATCH 09/26] rename in-SQL references to operations schema within the models Signed-off-by: Adrian Edwards --- .../application/db/models/operations.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/collectoss/application/db/models/operations.py b/collectoss/application/db/models/operations.py index 34c99fe0e..417230a50 100644 --- a/collectoss/application/db/models/operations.py +++ b/collectoss/application/db/models/operations.py @@ -99,7 +99,7 @@ class Settings(Base): Sequence("augur_settings_id_seq", start=1, schema="collection_operations"), primary_key=True, server_default=text( - "nextval('augur_operations.augur_settings_id_seq'::regclass)" + "nextval('collection_operations.augur_settings_id_seq'::regclass)" ), ) setting = Column(String) @@ -135,7 +135,7 @@ class WorkerHistory(Base): Sequence("gh_worker_history_history_id_seq", start=1, schema="collection_operations"), primary_key=True, server_default=text( - "nextval('augur_operations.gh_worker_history_history_id_seq'::regclass)" + "nextval('collection_operations.gh_worker_history_history_id_seq'::regclass)" ), ) repo_id = Column(BigInteger) @@ -181,7 +181,7 @@ class WorkerOauth(Base): Sequence("worker_oauth_oauth_id_seq", start=1000, schema="collection_operations"), primary_key=True, server_default=text( - "nextval('augur_operations.worker_oauth_oauth_id_seq'::regclass)" + "nextval('collection_operations.worker_oauth_oauth_id_seq'::regclass)" ), ) name = Column(String(255), nullable=False) @@ -639,7 +639,7 @@ class UserGroup(Base): group_id = Column(BigInteger, primary_key=True) user_id = Column(Integer, - ForeignKey("augur_operations.users.user_id", name="user_group_user_id_fkey"), nullable=False + ForeignKey("collection_operations.users.user_id", name="user_group_user_id_fkey"), nullable=False ) name = Column(String, nullable=False) favorited = Column(Boolean, nullable=False, server_default=text("FALSE")) @@ -746,7 +746,7 @@ class UserRepo(Base): __table_args__ = { "schema": "collection_operations" } group_id = Column( - ForeignKey("augur_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False + ForeignKey("collection_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False ) repo_id = Column( ForeignKey("collection_data.repo.repo_id", name="user_repo_user_id_fkey"), primary_key=True, nullable=False @@ -1013,9 +1013,9 @@ class UserSessionToken(Base): __table_args__ = { "schema": "collection_operations" } token = Column(String, primary_key=True, nullable=False) - user_id = Column(ForeignKey("augur_operations.users.user_id", name="user_session_token_user_id_fkey"), nullable=False) + user_id = Column(ForeignKey("collection_operations.users.user_id", name="user_session_token_user_id_fkey"), nullable=False) expiration = Column(BigInteger) - application_id = Column(ForeignKey("augur_operations.client_applications.id", name="user_session_token_application_id_fkey")) + application_id = Column(ForeignKey("collection_operations.client_applications.id", name="user_session_token_application_id_fkey")) created_at = Column(BigInteger) user = relationship("User", back_populates="tokens") @@ -1051,7 +1051,7 @@ class ClientApplication(Base): __table_args__ = { "schema": "collection_operations" } id = Column(String, primary_key=True, nullable=False) - user_id = Column(ForeignKey("augur_operations.users.user_id", name="client_application_user_id_fkey"), nullable=False) + user_id = Column(ForeignKey("collection_operations.users.user_id", name="client_application_user_id_fkey"), nullable=False) name = Column(String, nullable=False) redirect_url = Column(String, nullable=False) api_key = Column(String, nullable=False) @@ -1091,8 +1091,8 @@ class Subscription(Base): __tablename__ = "subscriptions" __table_args__ = { "schema": "collection_operations" } - application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True) - type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True) + application_id = Column(ForeignKey("collection_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True) + type_id = Column(ForeignKey("collection_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True) application = relationship("ClientApplication", back_populates="subscriptions") type = relationship("SubscriptionType", back_populates="subscriptions") @@ -1119,7 +1119,7 @@ class RefreshToken(Base): ) id = Column(String, primary_key=True) - user_session_token = Column(ForeignKey("augur_operations.user_session_tokens.token", name="refresh_token_session_token_id_fkey"), nullable=False) + user_session_token = Column(ForeignKey("collection_operations.user_session_tokens.token", name="refresh_token_session_token_id_fkey"), nullable=False) user_session = relationship("UserSessionToken", back_populates="refresh_tokens") From 2fc2d6e1807775478bf3fe410241095aca4f4d9c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:50:16 -0400 Subject: [PATCH 10/26] rename all references to augur_operations in the rest of the codebase Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 8 ++--- collectoss/application/cli/collection.py | 22 ++++++------ collectoss/application/cli/db.py | 8 ++--- collectoss/application/service_manager.py | 14 ++++---- collectoss/tasks/start_tasks.py | 2 +- collectoss/tasks/util/collection_util.py | 4 +-- .../configuration-file-reference.rst | 2 +- .../development-guide/tech-breakdown.rst | 2 +- keyman/README.md | 2 +- .../test_config/test_config.py | 34 +++++++++---------- .../test_github_api_key_handler.py | 6 ++-- 11 files changed, 52 insertions(+), 52 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index f85a4e105..378a8a0fa 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -340,7 +340,7 @@ def stop_processes(signal, logger, engine): def assign_orphan_repos_to_default_user(session): query = s.sql.text(""" - SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM augur_operations.user_repos) + SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM collection_operations.user_repos) """) repos = session.execute_sql(query).fetchall() @@ -377,13 +377,13 @@ def repo_reset(backend_app): Refresh repo collection to force data collection """ backend_app.database.execute(s.sql.text(""" - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Pending',core_task_id = NULL, core_data_last_collected = NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Pending',secondary_task_id = NULL, secondary_data_last_collected = NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Pending', facade_task_id=NULL, facade_data_last_collected = NULL; TRUNCATE collection_data.commits CASCADE; diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index 3f46d10d1..471606d62 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -202,13 +202,13 @@ def repo_reset(ctx): """ with ctx.obj.engine.connect() as connection: connection.execute(s.sql.text(""" - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Pending',core_task_id = NULL, core_data_last_collected = NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Pending',secondary_task_id = NULL, secondary_data_last_collected = NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Pending', facade_task_id=NULL, facade_data_last_collected = NULL; TRUNCATE collection_data.commits CASCADE; @@ -279,31 +279,31 @@ def cleanup_after_collection_halt(logger_instance, engine): #Make sure that database reflects collection status when processes are killed/stopped. def clean_collection_status(session): session.execute_sql(s.sql.text(""" - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Pending',core_task_id = NULL WHERE core_status='Collecting' AND core_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Success',core_task_id = NULL WHERE core_status='Collecting' AND core_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Pending',secondary_task_id = NULL WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Success',secondary_task_id = NULL WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Update', facade_task_id=NULL WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Success', facade_task_id=NULL WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Pending', facade_task_id=NULL WHERE facade_status='Failed Clone' OR facade_status='Initializing'; """)) @@ -311,7 +311,7 @@ def clean_collection_status(session): def assign_orphan_repos_to_default_user(session): query = s.sql.text(""" - SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM augur_operations.user_repos) + SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM collection_operations.user_repos) """) repos = session.execute_sql(query).fetchall() diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 6bc475711..10e830735 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -262,7 +262,7 @@ def add_github_org(ctx, organization_name): def get_db_version(engine): db_version_sql = s.sql.text( """ - SELECT * FROM augur_operations.augur_settings WHERE setting = 'augur_data_version' + SELECT * FROM collection_operations.augur_settings WHERE setting = 'augur_data_version' """ ) @@ -342,11 +342,11 @@ def update_api_key(ctx, api_key): """ update_api_key_sql = s.sql.text( """ - INSERT INTO augur_operations.augur_settings (setting,VALUE) VALUES ('augur_api_key','HudMhTyPW7wiaWopUKgRoGCxlIUulw4g') ON CONFLICT (setting) + INSERT INTO collection_operations.augur_settings (setting,VALUE) VALUES ('augur_api_key','HudMhTyPW7wiaWopUKgRoGCxlIUulw4g') ON CONFLICT (setting) DO UPDATE SET VALUE='HudMhTyPW7wiaWopUKgRoGCxlIUulw4g'; - --UPDATE augur_operations.augur_settings SET VALUE = :api_key WHERE setting='augur_api_key'; + --UPDATE collection_operations.augur_settings SET VALUE = :api_key WHERE setting='augur_api_key'; """ ) @@ -363,7 +363,7 @@ def update_api_key(ctx, api_key): def get_api_key(ctx): get_api_key_sql = s.sql.text( """ - SELECT value FROM augur_operations.augur_settings WHERE setting='augur_api_key'; + SELECT value FROM collection_operations.augur_settings WHERE setting='augur_api_key'; """ ) diff --git a/collectoss/application/service_manager.py b/collectoss/application/service_manager.py index 3cebb4d34..0ade300e8 100644 --- a/collectoss/application/service_manager.py +++ b/collectoss/application/service_manager.py @@ -110,31 +110,31 @@ def clear_redis_caches(): #Make sure that database reflects collection status when processes are killed/stopped. def clean_collection_status(session): session.execute_sql(s.sql.text(""" - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Pending',core_task_id = NULL WHERE core_status='Collecting' AND core_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET core_status='Success',core_task_id = NULL WHERE core_status='Collecting' AND core_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Pending',secondary_task_id = NULL WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET secondary_status='Success',secondary_task_id = NULL WHERE secondary_status='Collecting' AND secondary_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Update', facade_task_id=NULL WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Success', facade_task_id=NULL WHERE facade_status LIKE '%Collecting%' and facade_data_last_collected IS NOT NULL; - UPDATE augur_operations.collection_status + UPDATE collection_operations.collection_status SET facade_status='Pending', facade_task_id=NULL WHERE facade_status='Failed Clone' OR facade_status='Initializing'; """)) diff --git a/collectoss/tasks/start_tasks.py b/collectoss/tasks/start_tasks.py index 8e130f926..42ebed21e 100644 --- a/collectoss/tasks/start_tasks.py +++ b/collectoss/tasks/start_tasks.py @@ -383,7 +383,7 @@ def create_collection_status_records(self): logger = logging.getLogger(create_collection_status_records.__name__) query = s.sql.text(""" - SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM augur_operations.collection_status) + SELECT repo_id FROM repo WHERE repo_id NOT IN (SELECT repo_id FROM collection_operations.collection_status) """) repo = execute_sql(query).first() diff --git a/collectoss/tasks/util/collection_util.py b/collectoss/tasks/util/collection_util.py index 3781377b0..59dd22e22 100644 --- a/collectoss/tasks/util/collection_util.py +++ b/collectoss/tasks/util/collection_util.py @@ -74,7 +74,7 @@ def get_newly_added_repos(session, limit, hook): repo_query = s.sql.text(f""" select repo_git - from augur_operations.collection_status x, collection_data.repo y + from collection_operations.collection_status x, collection_data.repo y where x.repo_id=y.repo_id and {condition_string} order by {order_by_field} @@ -96,7 +96,7 @@ def get_repos_for_recollection(session, limit, hook, days_until_collect_again): repo_query = s.sql.text(f""" select repo_git - from augur_operations.collection_status x, repo y + from collection_operations.collection_status x, repo y where x.repo_id = y.repo_id and {condition_string} and {hook}_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' diff --git a/docs/source/development-guide/configuration-file-reference.rst b/docs/source/development-guide/configuration-file-reference.rst index ecca79590..a2da864cd 100644 --- a/docs/source/development-guide/configuration-file-reference.rst +++ b/docs/source/development-guide/configuration-file-reference.rst @@ -1,7 +1,7 @@ Configuration file reference =============================== -CollectOSS's configuration template file, which generates your locally deployed ``augur.config.json`` file, is found at ``collectoss/config.py``. You will notice a small collection of workers are turned on to start with, by examining the ``switch`` variable within the ``Workers`` block of the config file. You can also specify the number of processes to spawn for each worker using the ``workers`` command. The default is one, and we recommend you start here. If you are going to spawn multiple workers, be sure you have enough credentials cached in the ``augur_operations.worker_oath`` table for the platforms you use. +CollectOSS's configuration template file, which generates your locally deployed ``augur.config.json`` file, is found at ``collectoss/config.py``. You will notice a small collection of workers are turned on to start with, by examining the ``switch`` variable within the ``Workers`` block of the config file. You can also specify the number of processes to spawn for each worker using the ``workers`` command. The default is one, and we recommend you start here. If you are going to spawn multiple workers, be sure you have enough credentials cached in the ``collection_operations.worker_oath`` table for the platforms you use. If you have questions or would like to help please open an issue on GitHub_. diff --git a/docs/source/development-guide/tech-breakdown.rst b/docs/source/development-guide/tech-breakdown.rst index ce4425877..0e002bcfa 100644 --- a/docs/source/development-guide/tech-breakdown.rst +++ b/docs/source/development-guide/tech-breakdown.rst @@ -127,7 +127,7 @@ Your CollectOSS instance will now be available at http://servername-or-ip:port_number Note: CollectOSS will run on port 5000 by default (you probably need to -change that in augur_operations.config for OSX) +change that in collection_operations.config for OSX) Stopping your CollectOSS Instance --------------------------------- diff --git a/keyman/README.md b/keyman/README.md index 1deb1b8b9..18622c914 100644 --- a/keyman/README.md +++ b/keyman/README.md @@ -119,7 +119,7 @@ python keyman/Orchestrator.py ## Adding Keys ```sql -INSERT INTO augur_operations.worker_oauth +INSERT INTO collection_operations.worker_oauth (name, consumer_key, consumer_secret, access_token, access_token_secret, platform) VALUES ('My GitHub Key', 'not_used', 'not_used', 'ghp_YOURTOKEN', 'not_used', 'github_rest'); diff --git a/tests/test_application/test_config/test_config.py b/tests/test_application/test_config/test_config.py index b03db89c6..8341b12da 100644 --- a/tests/test_application/test_config/test_config.py +++ b/tests/test_application/test_config/test_config.py @@ -26,7 +26,7 @@ def test_config_get_value(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_get_section(test_db_config, test_db_engine): @@ -62,7 +62,7 @@ def test_config_get_section(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_load_config(test_db_config, test_db_engine): @@ -102,7 +102,7 @@ def test_config_load_config(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_empty(test_db_config, test_db_engine): @@ -132,7 +132,7 @@ def test_config_empty(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_is_section_in_config(test_db_config, test_db_engine): @@ -163,7 +163,7 @@ def test_config_is_section_in_config(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_add_settings(test_db_config, test_db_engine): @@ -174,7 +174,7 @@ def test_config_add_settings(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() + result = connection.execute("""SELECT * FROM collection_operations.config""").fetchall() assert result is not None assert len(result) == 2 @@ -189,7 +189,7 @@ def test_config_add_settings(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_update_settings(test_db_config, test_db_engine): @@ -222,7 +222,7 @@ def test_config_update_settings(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() + result = connection.execute("""SELECT * FROM collection_operations.config""").fetchall() assert len(result) == 3 @@ -235,7 +235,7 @@ def test_config_update_settings(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_add_section_from_json(test_db_config, test_db_engine): @@ -252,7 +252,7 @@ def test_config_add_section_from_json(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""") + result = connection.execute("""SELECT * FROM collection_operations.config""") for row in result: dict_data = dict(row) @@ -266,7 +266,7 @@ def test_config_add_section_from_json(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_load_config_file(test_db_config): @@ -312,7 +312,7 @@ def test_config_load_config_from_dict(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() + result = connection.execute("""SELECT * FROM collection_operations.config""").fetchall() for row in result: dict_data = dict(row) @@ -328,7 +328,7 @@ def test_config_load_config_from_dict(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_config_clear(test_db_config, test_db_engine): @@ -350,14 +350,14 @@ def test_config_clear(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() + result = connection.execute("""SELECT * FROM collection_operations.config""").fetchall() assert len(result) == 0 finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_remove_section(test_db_config, test_db_engine): @@ -385,7 +385,7 @@ def test_remove_section(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() + result = connection.execute("""SELECT * FROM collection_operations.config""").fetchall() for row in result: dict_data = dict(row) @@ -395,7 +395,7 @@ def test_remove_section(test_db_config, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") diff --git a/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py index edf5ac3cf..98008a8a3 100644 --- a/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py +++ b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py @@ -43,7 +43,7 @@ def test_get_config_key(key_handler, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") + connection.execute("""DELETE FROM collection_operations.config""") def test_get_config_key_with_none_specified(key_handler, test_db_engine): @@ -78,7 +78,7 @@ def test_get_api_keys_from_database(key_handler, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.worker_oauth""") + connection.execute("""DELETE FROM collection_operations.worker_oauth""") api_key_list = ["asdfdfkey", "jloire", "zdfdr", "asdrxer"] @pytest.mark.parametrize("api_key", api_key_list) @@ -112,4 +112,4 @@ def test_get_api_keys(key_handler, test_db_engine): finally: with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.worker_oauth""") \ No newline at end of file + connection.execute("""DELETE FROM collection_operations.worker_oauth""") \ No newline at end of file From 2c11dad249ef8935b586d76097b79a728e180ac4 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:51:03 -0400 Subject: [PATCH 11/26] create database migration for the schema rename Signed-off-by: Adrian Edwards --- .../alembic/versions/43_rename_schema.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 collectoss/application/schema/alembic/versions/43_rename_schema.py diff --git a/collectoss/application/schema/alembic/versions/43_rename_schema.py b/collectoss/application/schema/alembic/versions/43_rename_schema.py new file mode 100644 index 000000000..c5b1a9acd --- /dev/null +++ b/collectoss/application/schema/alembic/versions/43_rename_schema.py @@ -0,0 +1,30 @@ +"""rename schema + +Revision ID: 43 +Revises: 42 +Create Date: 2026-05-27 15:28:12.439500 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '43' +down_revision = '42' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + conn = op.get_bind() + conn.execute(text("ALTER SCHEMA augur_data RENAME TO collection_data;")) + conn.execute(text("ALTER SCHEMA augur_operations RENAME TO collection_operations;")) + + + +def downgrade() -> None: + conn = op.get_bind() + conn.execute(text("ALTER SCHEMA collection_data RENAME TO augur_data;")) + conn.execute(text("ALTER SCHEMA collection_operations RENAME TO augur_operations;")) \ No newline at end of file From ba45cb1039f3f91574857017e8d7313c24910c94 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:59:08 -0400 Subject: [PATCH 12/26] correct more imports of the ops schema Signed-off-by: Adrian Edwards --- collectoss/api/routes/dei.py | 2 +- collectoss/tasks/frontend.py | 2 +- .../tasks/git/util/facade_worker/facade_worker/repofetch.py | 2 +- collectoss/util/repo_load_controller.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/collectoss/api/routes/dei.py b/collectoss/api/routes/dei.py index 64af957bf..5e18dceb8 100644 --- a/collectoss/api/routes/dei.py +++ b/collectoss/api/routes/dei.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) from collectoss.api.routes import API_VERSION -from collectoss.application.db.models.augur_operations import FRONTEND_REPO_GROUP_NAME +from collectoss.application.db.models.operations import FRONTEND_REPO_GROUP_NAME @app.route(f"/{API_VERSION}/dei/repo/add", methods=['POST']) @ssl_required diff --git a/collectoss/tasks/frontend.py b/collectoss/tasks/frontend.py index 4ed2e24aa..d78fc1e1d 100644 --- a/collectoss/tasks/frontend.py +++ b/collectoss/tasks/frontend.py @@ -10,7 +10,7 @@ from collectoss.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from collectoss.application.db.lib import get_group_by_name, get_repo_by_repo_git, get_github_repo_by_src_id, get_gitlab_repo_by_src_id from collectoss.tasks.github.util.util import get_owner_repo -from collectoss.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup, CollectionStatus +from collectoss.application.db.models.operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup, CollectionStatus from collectoss.tasks.github.util.github_paginator import hit_api from collectoss.application.db.models import UserRepo, Repo diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py b/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py index 3f7ab07e9..968c4c54d 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -33,7 +33,7 @@ from .utilitymethods import update_repo_log, get_absolute_repo_path from sqlalchemy.orm.exc import NoResultFound from collectoss.application.db.models.augur_data import * -from collectoss.application.db.models.augur_operations import CollectionStatus +from collectoss.application.db.models.operations import CollectionStatus from collectoss.application.db.util import execute_session_query, convert_orm_list_to_dict_list from collectoss.application.db.lib import execute_sql, get_repo_by_repo_git from typing_extensions import deprecated diff --git a/collectoss/util/repo_load_controller.py b/collectoss/util/repo_load_controller.py index 7e3a0548a..2fde93bb0 100644 --- a/collectoss/util/repo_load_controller.py +++ b/collectoss/util/repo_load_controller.py @@ -6,7 +6,7 @@ from collectoss.application.db.engine import DatabaseEngine from collectoss.application.db.models import Repo, UserRepo, RepoGroup, UserGroup, User, CollectionStatus -from collectoss.application.db.models.augur_operations import retrieve_owner_repos +from collectoss.application.db.models.operations import retrieve_owner_repos from collectoss.application.db.util import execute_session_query from sqlalchemy import Column, Table, MetaData, or_ From 419d3df52216997d45eb448834b66f3e7850e67f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 15:59:36 -0400 Subject: [PATCH 13/26] fix search paths Signed-off-by: Adrian Edwards --- collectoss/application/db/engine.py | 2 +- tests/test_helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collectoss/application/db/engine.py b/collectoss/application/db/engine.py index ef582dbed..e2ba3902e 100644 --- a/collectoss/application/db/engine.py +++ b/collectoss/application/db/engine.py @@ -105,7 +105,7 @@ def set_search_path(dbapi_connection, connection_record): existing_autocommit = dbapi_connection.autocommit dbapi_connection.autocommit = True cursor = dbapi_connection.cursor() - cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx") + cursor.execute("SET SESSION search_path=public,collection_data,collection_operations,spdx") cursor.close() dbapi_connection.autocommit = existing_autocommit diff --git a/tests/test_helpers.py b/tests/test_helpers.py index a0401f369..dd850a23f 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -14,7 +14,7 @@ def set_search_path(dbapi_connection, connection_record): existing_autocommit = dbapi_connection.autocommit dbapi_connection.autocommit = True cursor = dbapi_connection.cursor() - cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx") + cursor.execute("SET SESSION search_path=public,collectoss_data,collectoss_operations,spdx") cursor.close() dbapi_connection.autocommit = existing_autocommit From f275c2bf39f71c60bc81c3fa80f43239f45eb9c1 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:00:29 -0400 Subject: [PATCH 14/26] update test fixtures for worker tests Signed-off-by: Adrian Edwards --- tests/test_workers/test_set_up_fixtures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_workers/test_set_up_fixtures.py b/tests/test_workers/test_set_up_fixtures.py index 8d3ad70f1..fa4496cd8 100644 --- a/tests/test_workers/test_set_up_fixtures.py +++ b/tests/test_workers/test_set_up_fixtures.py @@ -153,8 +153,8 @@ def initialize_database_connections(self): "augur", "augur", "172.17.0.1", 5400, "test" ) - self.db_schema = 'augur_data' - self.helper_schema = 'augur_operations' + self.db_schema = 'collectoss_data' + self.helper_schema = 'collection_operations' self.helper_db = s.create_engine(DB_STR, poolclass=s.pool.NullPool, connect_args={'options': '-csearch_path={}'.format(self.helper_schema)}) From e410cfcc66679c4324a4ed30a5ad1661eb2de087 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:00:47 -0400 Subject: [PATCH 15/26] update unit test references to operations schema in SQL Signed-off-by: Adrian Edwards --- .../test_config/test_config.py | 16 ++++++++-------- .../test_augur_operations/test_user_group.py | 10 +++++----- .../test_augur_operations/test_user_repo.py | 6 +++--- .../test_repo_load_controller/helper.py | 18 +++++++++--------- .../test_repo_load_controller/util.py | 16 ++++++++-------- .../test_github_api_key_handler.py | 6 +++--- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/test_application/test_config/test_config.py b/tests/test_application/test_config/test_config.py index 8341b12da..4ed62d9c5 100644 --- a/tests/test_application/test_config/test_config.py +++ b/tests/test_application/test_config/test_config.py @@ -15,7 +15,7 @@ def test_config_get_value(test_db_config, test_db_engine): with test_db_engine.connect() as connection: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -43,7 +43,7 @@ def test_config_get_section(test_db_config, test_db_engine): for data in network_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -82,7 +82,7 @@ def test_config_load_config(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -123,7 +123,7 @@ def test_config_empty(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -151,7 +151,7 @@ def test_config_is_section_in_config(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -212,7 +212,7 @@ def test_config_update_settings(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -342,7 +342,7 @@ def test_config_clear(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) @@ -377,7 +377,7 @@ def test_remove_section(test_db_config, test_db_engine): for data in all_data: - query = text("""INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("section_name", "setting_name", "value", "type") VALUES (:section_name, :setting_name, :value, 'str');""") connection.execute(query, **data) diff --git a/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py index 1eb7c7492..f09582511 100644 --- a/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py +++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py @@ -73,19 +73,19 @@ def test_add_user_group(test_db_engine): with test_db_engine.connect() as connection: - query = s.text("""SELECT * FROM "augur_operations"."user_groups";""") + query = s.text("""SELECT * FROM "collection_operations"."user_groups";""") result = connection.execute(query).fetchall() assert result is not None assert len(result) == 3 - query = s.text("""SELECT * FROM "augur_operations"."user_groups" WHERE "user_id"={};""".format(data["users"][0]["id"])) + query = s.text("""SELECT * FROM "collection_operations"."user_groups" WHERE "user_id"={};""".format(data["users"][0]["id"])) result = connection.execute(query).fetchall() assert result is not None assert len(result) == 2 - query = s.text("""SELECT * FROM "augur_operations"."user_groups" WHERE "user_id"={};""".format(data["users"][1]["id"])) + query = s.text("""SELECT * FROM "collection_operations"."user_groups" WHERE "user_id"={};""".format(data["users"][1]["id"])) result = connection.execute(query).fetchall() assert result is not None @@ -212,7 +212,7 @@ def test_remove_user_group(test_db_engine): with test_db_engine.connect() as connection: - query = s.text("""SELECT * FROM "augur_operations"."user_groups";""") + query = s.text("""SELECT * FROM "collection_operations"."user_groups";""") result = connection.execute(query).fetchall() assert result is not None @@ -226,7 +226,7 @@ def test_remove_user_group(test_db_engine): with test_db_engine.connect() as connection: - query = s.text("""SELECT * FROM "augur_operations"."user_groups";""") + query = s.text("""SELECT * FROM "collection_operations"."user_groups";""") result = connection.execute(query).fetchall() assert result is not None diff --git a/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py index ee7abf4c1..58b1488d9 100644 --- a/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py +++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py @@ -74,7 +74,7 @@ def test_add_repo_to_user_group(test_db_engine): with test_db_engine.connect() as connection: - query = s.text("""SELECT * FROM "augur_operations"."user_repos";""") + query = s.text("""SELECT * FROM "collection_operations"."user_repos";""") # WHERE "group_id"=:user_group_id AND "repo_id"=:repo_id result = connection.execute(query).fetchall() @@ -82,14 +82,14 @@ def test_add_repo_to_user_group(test_db_engine): assert len(result) == 4 - query = s.text("""SELECT * FROM "augur_operations"."user_repos" WHERE "group_id"={};""".format(data["user_group_ids"][0])) + query = s.text("""SELECT * FROM "collection_operations"."user_repos" WHERE "group_id"={};""".format(data["user_group_ids"][0])) result = connection.execute(query).fetchall() assert result is not None assert len(result) == 2 - query = s.text("""SELECT * FROM "augur_operations"."user_repos" WHERE "group_id"={};""".format(data["user_group_ids"][0])) + query = s.text("""SELECT * FROM "collection_operations"."user_repos" WHERE "group_id"={};""".format(data["user_group_ids"][0])) result = connection.execute(query).fetchall() assert result is not None diff --git a/tests/test_application/test_repo_load_controller/helper.py b/tests/test_application/test_repo_load_controller/helper.py index 051e48eff..8138783f7 100644 --- a/tests/test_application/test_repo_load_controller/helper.py +++ b/tests/test_application/test_repo_load_controller/helper.py @@ -28,19 +28,19 @@ def get_repo_group_delete_statement(): def get_user_delete_statement(): - return get_delete_statement("augur_operations", "users") + return get_delete_statement("collection_operations", "users") def get_user_repo_delete_statement(): - return get_delete_statement("augur_operations", "user_repos") + return get_delete_statement("collection_operations", "user_repos") def get_user_group_delete_statement(): - return get_delete_statement("augur_operations", "user_groups") + return get_delete_statement("collection_operations", "user_groups") def get_config_delete_statement(): - return get_delete_statement("augur_operations", "config") + return get_delete_statement("collection_operations", "config") def get_repo_related_delete_statements(table_list): """Takes a list of tables related to the RepoLoadController class and generates a delete statement. @@ -96,7 +96,7 @@ def get_repo_insert_statement(repo_id, rg_id, repo_url="place holder url"): def get_user_repo_insert_statement(repo_id, group_id): - return """INSERT INTO "augur_operations"."user_repos" ("repo_id", "group_id") VALUES ({}, {});""".format(repo_id, group_id) + return """INSERT INTO "collection_operations"."user_repos" ("repo_id", "group_id") VALUES ({}, {});""".format(repo_id, group_id) def get_repo_group_insert_statement(rg_id): @@ -104,14 +104,14 @@ def get_repo_group_insert_statement(rg_id): def get_user_insert_statement(user_id, username="bil", email="default@gmail.com", password="pass"): - return """INSERT INTO "augur_operations"."users" ("user_id", "login_name", "login_hashword", "email", "first_name", "last_name", "admin") VALUES ({}, '{}', '{}', '{}', 'bill', 'bob', false);""".format(user_id, username, User.compute_hashsed_password(password), email) + return """INSERT INTO "collection_operations"."users" ("user_id", "login_name", "login_hashword", "email", "first_name", "last_name", "admin") VALUES ({}, '{}', '{}', '{}', 'bill', 'bob', false);""".format(user_id, username, User.compute_hashsed_password(password), email) def get_user_group_insert_statement(user_id, group_name, group_id=None): if group_id: - return """INSERT INTO "augur_operations"."user_groups" ("group_id", "user_id", "name") VALUES ({}, {}, '{}');""".format(group_id, user_id, group_name) + return """INSERT INTO "collection_operations"."user_groups" ("group_id", "user_id", "name") VALUES ({}, {}, '{}');""".format(group_id, user_id, group_name) - return """INSERT INTO "augur_operations"."user_groups" ("user_id", "name") VALUES ({}, '{}');""".format(user_id, group_name) + return """INSERT INTO "collection_operations"."user_groups" ("user_id", "name") VALUES ({}, '{}');""".format(user_id, group_name) ######## Helper Functions to get retrieve data from tables ################# @@ -135,7 +135,7 @@ def get_repos(connection, where_string=None): def get_user_repos(connection): - return connection.execute(s.text("""SELECT * FROM "augur_operations"."user_repos";""")).fetchall() + return connection.execute(s.text("""SELECT * FROM "collection_operations"."user_repos";""")).fetchall() ######## Helper Functions to get repos in an org ################# diff --git a/tests/test_application/test_repo_load_controller/util.py b/tests/test_application/test_repo_load_controller/util.py index 1283e7580..305d9acf7 100644 --- a/tests/test_application/test_repo_load_controller/util.py +++ b/tests/test_application/test_repo_load_controller/util.py @@ -14,19 +14,19 @@ def get_repo_group_delete_statement(): def get_user_delete_statement(): - return get_delete_statement("augur_operations", "users") + return get_delete_statement("collection_operations", "users") def get_user_repo_delete_statement(): - return get_delete_statement("augur_operations", "user_repos") + return get_delete_statement("collection_operations", "user_repos") def get_user_group_delete_statement(): - return get_delete_statement("augur_operations", "user_groups") + return get_delete_statement("collection_operations", "user_groups") def get_config_delete_statement(): - return get_delete_statement("augur_operations", "config") + return get_delete_statement("collection_operations", "config") def get_repo_related_delete_statements(table_list): """Takes a list of tables related to the RepoLoadController class and generates a delete statement. @@ -86,14 +86,14 @@ def get_repo_group_insert_statement(rg_id): def get_user_insert_statement(user_id): - return """INSERT INTO "augur_operations"."users" ("user_id", "login_name", "login_hashword", "email", "first_name", "last_name", "admin") VALUES ({}, 'bil', 'pass', 'b@gmil.com', 'bill', 'bob', false);""".format(user_id) + return """INSERT INTO "collection_operations"."users" ("user_id", "login_name", "login_hashword", "email", "first_name", "last_name", "admin") VALUES ({}, 'bil', 'pass', 'b@gmil.com', 'bill', 'bob', false);""".format(user_id) def get_user_group_insert_statement(user_id, group_name, group_id=None): if group_id: - return """INSERT INTO "augur_operations"."user_groups" ("group_id", "user_id", "name") VALUES ({}, {}, '{}');""".format(group_id, user_id, group_name) + return """INSERT INTO "collection_operations"."user_groups" ("group_id", "user_id", "name") VALUES ({}, {}, '{}');""".format(group_id, user_id, group_name) - return """INSERT INTO "augur_operations"."user_groups" (user_id", "name") VALUES (1, 'default');""".format(user_id, group_name) + return """INSERT INTO "collection_operations"."user_groups" (user_id", "name") VALUES (1, 'default');""".format(user_id, group_name) ######## Helper Functions to get retrieve data from tables ################# @@ -117,7 +117,7 @@ def get_repos(connection, where_string=None): def get_user_repos(connection): - return connection.execute(s.text("""SELECT * FROM "augur_operations"."user_repos";""")).fetchall() + return connection.execute(s.text("""SELECT * FROM "collection_operations"."user_repos";""")).fetchall() ######## Helper Functions to get repos in an org ################# diff --git a/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py index 98008a8a3..54849ebbc 100644 --- a/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py +++ b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py @@ -33,7 +33,7 @@ def test_get_config_key(key_handler, test_db_engine): data = {"github_api_key": "asdfdfkey"} with test_db_engine.connect() as connection: - query = text("""INSERT INTO "augur_operations"."config" ("id", "section_name", "setting_name", "value", "type") VALUES (3, 'Keys', 'github_api_key', :github_api_key, 'str');""") + query = text("""INSERT INTO "collection_operations"."config" ("id", "section_name", "setting_name", "value", "type") VALUES (3, 'Keys', 'github_api_key', :github_api_key, 'str');""") connection.execute(query, **data) @@ -64,7 +64,7 @@ def test_get_api_keys_from_database(key_handler, test_db_engine): for value in data: - query = text("""INSERT INTO "augur_operations"."worker_oauth" ("name", "consumer_key", "consumer_secret", "access_token", "access_token_secret", "repo_directory", "platform") VALUES ('test_key', '0', '0', :api_key, '0', NULL, 'github');""") + query = text("""INSERT INTO "collection_operations"."worker_oauth" ("name", "consumer_key", "consumer_secret", "access_token", "access_token_secret", "repo_directory", "platform") VALUES ('test_key', '0', '0', :api_key, '0', NULL, 'github');""") connection.execute(query, **value) @@ -101,7 +101,7 @@ def test_get_api_keys(key_handler, test_db_engine): for value in data: - query = text("""INSERT INTO "augur_operations"."worker_oauth" ("name", "consumer_key", "consumer_secret", "access_token", "access_token_secret", "repo_directory", "platform") VALUES ('test_key', '0', '0', :api_key, '0', NULL, 'github');""") + query = text("""INSERT INTO "collection_operations"."worker_oauth" ("name", "consumer_key", "consumer_secret", "access_token", "access_token_secret", "repo_directory", "platform") VALUES ('test_key', '0', '0', :api_key, '0', NULL, 'github');""") connection.execute(query, **value) From 68d9d70b8378542f1ee0454d891813f81b9a32bb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:01:05 -0400 Subject: [PATCH 16/26] update docs references to ops schema by name Signed-off-by: Adrian Edwards --- docs/source/getting-started/collecting-data.rst | 2 +- docs/source/getting-started/command-line-interface/db.rst | 4 ++-- docs/source/schema/overview.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 78b421f0f..5e7297ae3 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -60,7 +60,7 @@ There are many collection jobs that ship ready to collect out of the box: - ``collectoss.tasks.github.releases.tasks`` (collects release data from the GitHub API) - ``collectoss.tasks.data_analysis.insight_worker.tasks`` (queries CollectOSS's metrics API to find interesting anomalies in the collected data) -All worker configuration options are found in the config table generated when collectoss was installed. The config table is located in the augur_operations schema of your postgresql database. Each configurable data collection job set has its subsection with the same or similar title as the task's name. We recommend leaving the defaults and only changing them when explicitly necessary, as the default parameters will work for most use cases. Read on for more on how to make sure your workers are properly configured. +All worker configuration options are found in the config table generated when collectoss was installed. The config table is located in the collection_operations schema of your postgresql database. Each configurable data collection job set has its subsection with the same or similar title as the task's name. We recommend leaving the defaults and only changing them when explicitly necessary, as the default parameters will work for most use cases. Read on for more on how to make sure your workers are properly configured. Worker-specific configuration options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/getting-started/command-line-interface/db.rst b/docs/source/getting-started/command-line-interface/db.rst index 853cd2a66..9b8079f11 100644 --- a/docs/source/getting-started/command-line-interface/db.rst +++ b/docs/source/getting-started/command-line-interface/db.rst @@ -175,7 +175,7 @@ Example usage\: > ADD COLUMN "repo_archived_date_collected" timestamptz(0), > ALTER COLUMN "forked_from" TYPE varchar USING "forked_from"::varchar; > ALTER TABLE - > update "augur_operations"."augur_settings" set value = 17 where setting = 'augur_data_version'; + > update "collection_operations"."augur_settings" set value = 17 where setting = 'augur_data_version'; > UPDATE 1 > CLI: [db.upgrade_db_version] [INFO] Upgrading from 17 to 18 > etc... @@ -193,4 +193,4 @@ Example usage\: $ uv run collectoss db create-schema .. note:: - If this runs successfully, you should see a bunch of schema creation commands fly by pretty fast. If everything worked you should see: ``update "augur_operations"."augur_settings" set value = xx where setting = 'augur_data_version';`` at the end. + If this runs successfully, you should see a bunch of schema creation commands fly by pretty fast. If everything worked you should see: ``update "collection_operations"."augur_settings" set value = xx where setting = 'augur_data_version';`` at the end. diff --git a/docs/source/schema/overview.rst b/docs/source/schema/overview.rst index 1322cce8d..d065b64df 100644 --- a/docs/source/schema/overview.rst +++ b/docs/source/schema/overview.rst @@ -61,7 +61,7 @@ gathered from commits, issues, and other info. CollectOSS Operations ------------------------------------------------------- -The ``augur_operations`` tables are where most of the operations tables +The ``collection_operations`` tables are where most of the operations tables exist. There are a few, like ``settings`` that remain in ``augur_data`` for now, but will be moved. They keep records related to analytical history and data provenance for data in the schema. They also From e5320069cc71cfbf396cd0bf21388482f1bce893 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:03:17 -0400 Subject: [PATCH 17/26] update database comments (and include them in the schema rename migration) Signed-off-by: Adrian Edwards --- .../application/db/models/operations.py | 6 +-- .../alembic/versions/43_rename_schema.py | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/collectoss/application/db/models/operations.py b/collectoss/application/db/models/operations.py index 417230a50..4a05ed802 100644 --- a/collectoss/application/db/models/operations.py +++ b/collectoss/application/db/models/operations.py @@ -120,7 +120,7 @@ class Settings(Base): ), Index("repos_id,statusops", "repos_id", "status"), schema="collection_operations", - comment="For future use when we move all working tables to the augur_operations schema. ", + comment="For future use when we move all working tables to the collection_operations schema. ", ) class WorkerHistory(Base): @@ -197,7 +197,7 @@ class WorkerSettingsFacade(Base): __tablename__ = "worker_settings_facade" __table_args__ = { "schema": "collection_operations", - "comment": "For future use when we move all working tables to the augur_operations schema. ", + "comment": "For future use when we move all working tables to the collection_operations schema. ", } id = Column(Integer, primary_key=True) @@ -216,7 +216,7 @@ class WorkerSettingsFacade(Base): "working_commit", String(40), server_default=text("'NULL'::character varying") ), schema="collection_operations", - comment="For future use when we move all working tables to the augur_operations schema. ", + comment="For future use when we move all working tables to the collection_operations schema. ", ) class BadgingDEI(Base): diff --git a/collectoss/application/schema/alembic/versions/43_rename_schema.py b/collectoss/application/schema/alembic/versions/43_rename_schema.py index c5b1a9acd..b860cd6e4 100644 --- a/collectoss/application/schema/alembic/versions/43_rename_schema.py +++ b/collectoss/application/schema/alembic/versions/43_rename_schema.py @@ -22,9 +22,48 @@ def upgrade() -> None: conn.execute(text("ALTER SCHEMA augur_data RENAME TO collection_data;")) conn.execute(text("ALTER SCHEMA augur_operations RENAME TO collection_operations;")) + op.create_table_comment( + 'repos_fetch_log', + 'For future use when we move all working tables to the collection_operations schema. ', + existing_comment='For future use when we move all working tables to the augur_operations schema. ', + schema='collection_operations' + ) + op.create_table_comment( + 'worker_settings_facade', + 'For future use when we move all working tables to the collection_operations schema. ', + existing_comment='For future use when we move all working tables to the augur_operations schema. ', + schema='collection_operations' + ) + op.create_table_comment( + 'working_commits', + 'For future use when we move all working tables to the collection_operations schema. ', + existing_comment='For future use when we move all working tables to the augur_operations schema. ', + schema='collection_operations' + ) + def downgrade() -> None: + + op.create_table_comment( + 'working_commits', + 'For future use when we move all working tables to the augur_operations schema. ', + existing_comment='For future use when we move all working tables to the collection_operations schema. ', + schema='collection_operations' + ) + op.create_table_comment( + 'worker_settings_facade', + 'For future use when we move all working tables to the augur_operations schema. ', + existing_comment='For future use when we move all working tables to the collection_operations schema. ', + schema='collection_operations' + ) + op.create_table_comment( + 'repos_fetch_log', + 'For future use when we move all working tables to the augur_operations schema. ', + existing_comment='For future use when we move all working tables to the collection_operations schema. ', + schema='collection_operations' + ) + conn = op.get_bind() conn.execute(text("ALTER SCHEMA collection_data RENAME TO augur_data;")) conn.execute(text("ALTER SCHEMA collection_operations RENAME TO augur_operations;")) \ No newline at end of file From cd47c36ddcf42ea977e303187e71b92c349b1fed Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:03:38 -0400 Subject: [PATCH 18/26] update another schema reference in tests Signed-off-by: Adrian Edwards --- conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conftest.py b/conftest.py index 38f7e7055..61e84e2a4 100644 --- a/conftest.py +++ b/conftest.py @@ -195,7 +195,7 @@ def read_only_db(empty_db): database_name = empty_db.url.database test_username = "testuser" test_password = "testpass" - schemas = ["public", "collection_data", "augur_operations"] + schemas = ["public", "collection_data", "collection_operations"] # create read-only user empty_db.execute(s.text(f"CREATE USER testuser WITH PASSWORD '{test_password}';")) From 63e768da1cd15f0c5d58bc9e32cc455af415211c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 16:16:58 -0400 Subject: [PATCH 19/26] forgot to change augur_data model imports Signed-off-by: Adrian Edwards --- .../tasks/git/util/facade_worker/facade_worker/repofetch.py | 2 +- collectoss/tasks/github/repo_info/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py b/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py index 968c4c54d..dfb331c1d 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -32,7 +32,7 @@ import sqlalchemy as s from .utilitymethods import update_repo_log, get_absolute_repo_path from sqlalchemy.orm.exc import NoResultFound -from collectoss.application.db.models.augur_data import * +from collectoss.application.db.models.data import * from collectoss.application.db.models.operations import CollectionStatus from collectoss.application.db.util import execute_session_query, convert_orm_list_to_dict_list from collectoss.application.db.lib import execute_sql, get_repo_by_repo_git diff --git a/collectoss/tasks/github/repo_info/core.py b/collectoss/tasks/github/repo_info/core.py index 25b1b25d1..582a5ed45 100644 --- a/collectoss/tasks/github/repo_info/core.py +++ b/collectoss/tasks/github/repo_info/core.py @@ -9,7 +9,7 @@ from collectoss.application.db.models import * from collectoss.application.db.lib import execute_sql from collectoss.tasks.github.util.github_task_session import * -from collectoss.application.db.models.augur_data import RepoBadging +from collectoss.application.db.models.data import RepoBadging from urllib.parse import quote def query_committers_count(key_auth, logger, owner, repo): From ca62e363f232b245753ebc55e6dc387c7d668b90 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 17:17:26 -0400 Subject: [PATCH 20/26] rename a schema reflection ref that was crashing gunicorn Signed-off-by: Adrian Edwards --- collectoss/util/repo_load_controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/util/repo_load_controller.py b/collectoss/util/repo_load_controller.py index 2fde93bb0..5dbef272a 100644 --- a/collectoss/util/repo_load_controller.py +++ b/collectoss/util/repo_load_controller.py @@ -19,7 +19,7 @@ with DatabaseEngine() as engine: - augur_data_schema = MetaData(schema = "augur_data") + augur_data_schema = MetaData(schema = "collection_data") augur_data_schema.reflect(bind = engine, views = True) commits_materialized_view: Table = augur_data_schema.tables["collection_data.api_get_all_repos_commits"] From 89f623a6824c353e3c354184727f1abd76092929 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 17:17:55 -0400 Subject: [PATCH 21/26] rename an augur_data variable Signed-off-by: Adrian Edwards --- collectoss/util/repo_load_controller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collectoss/util/repo_load_controller.py b/collectoss/util/repo_load_controller.py index 5dbef272a..1f05c2bca 100644 --- a/collectoss/util/repo_load_controller.py +++ b/collectoss/util/repo_load_controller.py @@ -19,11 +19,11 @@ with DatabaseEngine() as engine: - augur_data_schema = MetaData(schema = "collection_data") - augur_data_schema.reflect(bind = engine, views = True) + data_schema = MetaData(schema = "collection_data") + data_schema.reflect(bind = engine, views = True) - commits_materialized_view: Table = augur_data_schema.tables["collection_data.api_get_all_repos_commits"] - issues_materialized_view: Table = augur_data_schema.tables["collection_data.api_get_all_repos_issues"] + commits_materialized_view: Table = data_schema.tables["collection_data.api_get_all_repos_commits"] + issues_materialized_view: Table = data_schema.tables["collection_data.api_get_all_repos_issues"] class RepoLoadController: From 5ca88b8e84d5c7d3fb48cefd6c93f3925b9bb888 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 17:19:39 -0400 Subject: [PATCH 22/26] change augur_data references in test files Signed-off-by: Adrian Edwards --- tests/test_application/test_db/test_session.py | 18 +++++++++--------- .../test_repo_load_controller/helper.py | 10 +++++----- .../test_repo_load_controller/util.py | 10 +++++----- .../test_github_tasks/test_pull_requests.py | 14 +++++++------- .../test_endpoints.py | 2 +- tests/test_workers/test_set_up_fixtures.py | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/test_application/test_db/test_session.py b/tests/test_application/test_db/test_session.py index 856a3f194..3c661136d 100644 --- a/tests/test_application/test_db/test_session.py +++ b/tests/test_application/test_db/test_session.py @@ -26,7 +26,7 @@ def test_execute_sql(test_db_engine): for data in all_data: - statement = s.sql.text("""INSERT INTO "augur_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") + statement = s.sql.text("""INSERT INTO "collection_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") connection.execute(statement, **data) @@ -106,7 +106,7 @@ def test_insert_data_with_updates(test_db_engine): with test_db_engine.connect() as connection: - statement = s.sql.text("""INSERT INTO "augur_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") + statement = s.sql.text("""INSERT INTO "collection_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") connection.execute(statement, **data_1) @@ -172,7 +172,7 @@ def test_insert_data_partial_update(test_db_engine): try: with test_db_engine.connect() as connection: - statement = s.sql.text("""INSERT INTO "augur_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") + statement = s.sql.text("""INSERT INTO "collection_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, NULL, NULL, NULL, NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, :gh_user_id, :gh_login, 'https://api.github.com/users/ivanayov', 'https://github.com/ivanayov', 'MDQ6VXNlcjQxNjAxMzM=', 'https://avatars.githubusercontent.com/u/4160133?v=4', '', 'https://api.github.com/users/ivanayov/followers', 'https://api.github.com/users/ivanayov/following{/other_user}', 'https://api.github.com/users/ivanayov/gists{/gist_id}', 'https://api.github.com/users/ivanayov/starred{/owner}{/repo}', 'https://api.github.com/users/ivanayov/subscriptions', 'https://api.github.com/users/ivanayov/orgs', 'https://api.github.com/users/ivanayov/repos', 'https://api.github.com/users/ivanayov/events{/privacy}', 'https://api.github.com/users/ivanayov/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'Pr Task', '2.0', 'Github API', '2022-08-05 09:06:39', :cntrb_id);""") connection.execute(statement, **data_1) @@ -210,11 +210,11 @@ def test_insert_issue_data_with_invalid_strings(test_db_engine): # insert the cntrb_id and cntrb_login into the contributors table so the contributor is present. # This is so we don't get a foreign key error on the cntrb_id when we insert the prs query = s.sql.text(""" - DELETE FROM "augur_data"."repo"; - DELETE FROM "augur_data"."repo_groups"; - INSERT INTO "augur_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25'); + DELETE FROM "collection_data"."repo"; + DELETE FROM "collection_data"."repo_groups"; + INSERT INTO "collection_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25'); - INSERT INTO "augur_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 1, 'https://github.com/chaoss/collectoss', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07'); + INSERT INTO "collection_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 1, 'https://github.com/chaoss/collectoss', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07'); """) connection.execute(query) @@ -243,8 +243,8 @@ def test_insert_issue_data_with_invalid_strings(test_db_engine): connection.execute(""" DELETE FROM collection_data.issues; - DELETE FROM "augur_data"."repo"; - DELETE FROM "augur_data"."repo_groups"; + DELETE FROM "collection_data"."repo"; + DELETE FROM "collection_data"."repo_groups"; """) diff --git a/tests/test_application/test_repo_load_controller/helper.py b/tests/test_application/test_repo_load_controller/helper.py index 8138783f7..da23932ae 100644 --- a/tests/test_application/test_repo_load_controller/helper.py +++ b/tests/test_application/test_repo_load_controller/helper.py @@ -20,11 +20,11 @@ def get_delete_statement(schema, table): def get_repo_delete_statement(): - return get_delete_statement("augur_data", "repo") + return get_delete_statement("collection_data", "repo") def get_repo_group_delete_statement(): - return get_delete_statement("augur_data", "repo_groups") + return get_delete_statement("collection_data", "repo_groups") def get_user_delete_statement(): @@ -92,7 +92,7 @@ def add_keys_to_test_db(test_db_engine): def get_repo_insert_statement(repo_id, rg_id, repo_url="place holder url"): - return """INSERT INTO "augur_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, {}, '{}', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07');""".format(repo_id, rg_id, repo_url) + return """INSERT INTO "collection_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, {}, '{}', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07');""".format(repo_id, rg_id, repo_url) def get_user_repo_insert_statement(repo_id, group_id): @@ -100,7 +100,7 @@ def get_user_repo_insert_statement(repo_id, group_id): def get_repo_group_insert_statement(rg_id): - return """INSERT INTO "augur_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25');""".format(rg_id) + return """INSERT INTO "collection_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25');""".format(rg_id) def get_user_insert_statement(user_id, username="bil", email="default@gmail.com", password="pass"): @@ -119,7 +119,7 @@ def get_user_group_insert_statement(user_id, group_name, group_id=None): def get_repos(connection, where_string=None): query_list = [] - query_list.append('SELECT * FROM "augur_data"."repo"') + query_list.append('SELECT * FROM "collection_data"."repo"') if where_string: if where_string.endswith(";"): diff --git a/tests/test_application/test_repo_load_controller/util.py b/tests/test_application/test_repo_load_controller/util.py index 305d9acf7..887dbf617 100644 --- a/tests/test_application/test_repo_load_controller/util.py +++ b/tests/test_application/test_repo_load_controller/util.py @@ -6,11 +6,11 @@ def get_delete_statement(schema, table): def get_repo_delete_statement(): - return get_delete_statement("augur_data", "repo") + return get_delete_statement("collection_data", "repo") def get_repo_group_delete_statement(): - return get_delete_statement("augur_data", "repo_groups") + return get_delete_statement("collection_data", "repo_groups") def get_user_delete_statement(): @@ -78,11 +78,11 @@ def add_keys_to_test_db(test_db_engine): def get_repo_insert_statement(repo_id, rg_id, repo_url="place holder url"): - return """INSERT INTO "augur_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, {}, '{}', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07');""".format(repo_id, rg_id, repo_url) + return """INSERT INTO "collection_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, {}, '{}', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07');""".format(repo_id, rg_id, repo_url) def get_repo_group_insert_statement(rg_id): - return """INSERT INTO "augur_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25');""".format(rg_id) + return """INSERT INTO "collection_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES ({}, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25');""".format(rg_id) def get_user_insert_statement(user_id): @@ -101,7 +101,7 @@ def get_user_group_insert_statement(user_id, group_name, group_id=None): def get_repos(connection, where_string=None): query_list = [] - query_list.append('SELECT * FROM "augur_data"."repo"') + query_list.append('SELECT * FROM "collection_data"."repo"') if where_string: if where_string.endswith(";"): diff --git a/tests/test_tasks/test_github_tasks/test_pull_requests.py b/tests/test_tasks/test_github_tasks/test_pull_requests.py index 83751ea22..0f70a64b0 100644 --- a/tests/test_tasks/test_github_tasks/test_pull_requests.py +++ b/tests/test_tasks/test_github_tasks/test_pull_requests.py @@ -312,13 +312,13 @@ def test_insert_prs(github_api_key_headers, test_db_session, repo): # insert the cntrb_id and cntrb_login into the contributors table so the contributor is present. # This is so we don't get a foreign key error on the cntrb_id when we insert the prs - query = text("""INSERT INTO "augur_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, 'kannayoshihiro@gmail.com', 'KANNA Yoshihiro', 'UTMC', '2009-04-17 12:43:58', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 'kannayoshihiro@gmail.com', '2021-01-28 21:56:10-06', 74832, :gh_login, 'https://api.github.com/users/nan', 'https://github.com/nan', 'MDQ6VXNlcjc0ODMy', 'https://avatars.githubusercontent.com/u/74832?v=4', '', 'https://api.github.com/users/nan/followers', 'https://api.github.com/users/nan/following{/other_user}', 'https://api.github.com/users/nan/gists{/gist_id}', 'https://api.github.com/users/nan/starred{/owner}{/repo}', 'https://api.github.com/users/nan/subscriptions', 'https://api.github.com/users/nan/orgs', 'https://api.github.com/users/nan/repos', 'https://api.github.com/users/nan/events{/privacy}', 'https://api.github.com/users/nan/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'GitHub API Worker', '1.0.0', 'GitHub API', '2021-10-28 15:23:46', :cntrb_id); + query = text("""INSERT INTO "collection_data"."contributors" ("cntrb_login", "cntrb_email", "cntrb_full_name", "cntrb_company", "cntrb_created_at", "cntrb_type", "cntrb_fake", "cntrb_deleted", "cntrb_long", "cntrb_lat", "cntrb_country_code", "cntrb_state", "cntrb_city", "cntrb_location", "cntrb_canonical", "cntrb_last_used", "gh_user_id", "gh_login", "gh_url", "gh_html_url", "gh_node_id", "gh_avatar_url", "gh_gravatar_id", "gh_followers_url", "gh_following_url", "gh_gists_url", "gh_starred_url", "gh_subscriptions_url", "gh_organizations_url", "gh_repos_url", "gh_events_url", "gh_received_events_url", "gh_type", "gh_site_admin", "gl_web_url", "gl_avatar_url", "gl_state", "gl_username", "gl_full_name", "gl_id", "tool_source", "tool_version", "data_source", "data_collection_date", "cntrb_id") VALUES (:cntrb_login, 'kannayoshihiro@gmail.com', 'KANNA Yoshihiro', 'UTMC', '2009-04-17 12:43:58', NULL, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, 'kannayoshihiro@gmail.com', '2021-01-28 21:56:10-06', 74832, :gh_login, 'https://api.github.com/users/nan', 'https://github.com/nan', 'MDQ6VXNlcjc0ODMy', 'https://avatars.githubusercontent.com/u/74832?v=4', '', 'https://api.github.com/users/nan/followers', 'https://api.github.com/users/nan/following{/other_user}', 'https://api.github.com/users/nan/gists{/gist_id}', 'https://api.github.com/users/nan/starred{/owner}{/repo}', 'https://api.github.com/users/nan/subscriptions', 'https://api.github.com/users/nan/orgs', 'https://api.github.com/users/nan/repos', 'https://api.github.com/users/nan/events{/privacy}', 'https://api.github.com/users/nan/received_events', 'User', 'false', NULL, NULL, NULL, NULL, NULL, NULL, 'GitHub API Worker', '1.0.0', 'GitHub API', '2021-10-28 15:23:46', :cntrb_id); - DELETE FROM "augur_data"."repo"; - DELETE FROM "augur_data"."repo_groups"; - INSERT INTO "augur_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25'); + DELETE FROM "collection_data"."repo"; + DELETE FROM "collection_data"."repo_groups"; + INSERT INTO "collection_data"."repo_groups" ("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 'Default Repo Group', 'The default repo group created by the schema generation script', '', 0, '2019-06-03 15:55:20', 'GitHub Organization', 'load', 'one', 'git', '2019-06-05 13:36:25'); - INSERT INTO "augur_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 1, 'https://github.com/chaoss/collectoss', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07'); + INSERT INTO "collection_data"."repo" ("repo_id", "repo_group_id", "repo_git", "repo_path", "repo_name", "repo_added", "repo_type", "url", "owner_id", "description", "primary_language", "created_at", "forked_from", "updated_at", "repo_archived_date_collected", "repo_archived", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (1, 1, 'https://github.com/chaoss/collectoss', NULL, NULL, '2022-08-15 21:08:07', '', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'CLI', '1.0', 'Git', '2022-08-15 21:08:07'); """) connection.execute(query, **contributor) @@ -354,8 +354,8 @@ def test_insert_prs(github_api_key_headers, test_db_session, repo): with test_db_session.engine.connect() as connection: connection.execute(f"DELETE FROM collection_data.pull_requests;") - connection.execute("""DELETE FROM "augur_data"."repo"; - DELETE FROM "augur_data"."repo_groups"; + connection.execute("""DELETE FROM "collection_data"."repo"; + DELETE FROM "collection_data"."repo_groups"; """) connection.execute(f"DELETE FROM collection_data.contributors WHERE cntrb_id!='{not_provided_cntrb_id}' AND cntrb_id!='{nan_cntrb_id}';") diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py index 14bd8bfe5..43b1a9524 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py @@ -13,7 +13,7 @@ def set_up_repo_groups(database_connection): repo_group_IDs = df['repo_group_id'].values.tolist() insert_repo_group_sql = s.sql.text(""" - INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); + INSERT INTO "collection_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); """) with open("tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv") as create_repo_groups_file: diff --git a/tests/test_workers/test_set_up_fixtures.py b/tests/test_workers/test_set_up_fixtures.py index fa4496cd8..82787ca97 100644 --- a/tests/test_workers/test_set_up_fixtures.py +++ b/tests/test_workers/test_set_up_fixtures.py @@ -12,7 +12,7 @@ def poll_database_connection(database_string): print("Attempting to create db engine") db = s.create_engine(database_string, poolclass=s.pool.NullPool, - connect_args={'options': '-csearch_path={}'.format('augur_data')}) + connect_args={'options': '-csearch_path={}'.format('collection_data')}) return db From eaa60572deac5a133c741285e058329b2502b923 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 17:19:47 -0400 Subject: [PATCH 23/26] change augur_data references in docs files Signed-off-by: Adrian Edwards --- docs/source/getting-started/command-line-interface/db.rst | 4 ++-- docs/source/schema/overview.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/getting-started/command-line-interface/db.rst b/docs/source/getting-started/command-line-interface/db.rst index 9b8079f11..6401658d1 100644 --- a/docs/source/getting-started/command-line-interface/db.rst +++ b/docs/source/getting-started/command-line-interface/db.rst @@ -167,10 +167,10 @@ Example usage\: > [INFO] Config file loaded successfully > CLI: [db.check_pgpass_credentials] [INFO] Credentials found in $HOME/.pgpass > CLI: [db.upgrade_db_version] [INFO] Upgrading from 16 to 17 - > ALTER TABLE "augur_data"."repo" + > ALTER TABLE "collection_data"."repo" > ALTER COLUMN "forked_from" TYPE varchar USING "forked_from"::varchar; > ALTER TABLE - > ALTER TABLE "augur_data"."repo" + > ALTER TABLE "collection_data"."repo" > ADD COLUMN "repo_archived" int4, > ADD COLUMN "repo_archived_date_collected" timestamptz(0), > ALTER COLUMN "forked_from" TYPE varchar USING "forked_from"::varchar; diff --git a/docs/source/schema/overview.rst b/docs/source/schema/overview.rst index d065b64df..d82d83b70 100644 --- a/docs/source/schema/overview.rst +++ b/docs/source/schema/overview.rst @@ -35,7 +35,7 @@ Schema Overview CollectOSS Data ------------------------------------------------------- -The ``augur_data`` schema contains *most* of the information analyzed +The ``collection_data`` schema contains *most* of the information analyzed and constructed by CollectOSS. The origin’s of the data inside of collectoss are from data collection tasks and populate this schema.: @@ -63,7 +63,7 @@ CollectOSS Operations The ``collection_operations`` tables are where most of the operations tables exist. There are a few, like ``settings`` that remain in -``augur_data`` for now, but will be moved. They keep records related to +``collection_data`` for now, but will be moved. They keep records related to analytical history and data provenance for data in the schema. They also store information including API keys. From 488b8a83958d8ff3cf060eb3d9c6a366e70140fc Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 17:19:53 -0400 Subject: [PATCH 24/26] change augur_data references in the CLI Signed-off-by: Adrian Edwards --- collectoss/application/cli/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 10e830735..d2514eb65 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -186,7 +186,7 @@ def add_repo_groups(ctx: click.Context, filename: str) -> None: insert_repo_group_sql = s.sql.text( """ - INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); + INSERT INTO "collectoss_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); """ ) From fca0f630034a0d56691c99b766999f57995b3d3f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 11:51:12 -0400 Subject: [PATCH 25/26] fix typo (its collection_data schema) Signed-off-by: Adrian Edwards --- collectoss/application/cli/db.py | 2 +- docs/source/quick-start.rst | 2 +- docs/source/schema/regularly_used_data.rst | 2 +- tests/test_helpers.py | 2 +- tests/test_workers/test_set_up_fixtures.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index d2514eb65..bff4f859f 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -186,7 +186,7 @@ def add_repo_groups(ctx: click.Context, filename: str) -> None: insert_repo_group_sql = s.sql.text( """ - INSERT INTO "collectoss_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); + INSERT INTO "collection_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); """ ) diff --git a/docs/source/quick-start.rst b/docs/source/quick-start.rst index a5466272f..b30101902 100644 --- a/docs/source/quick-start.rst +++ b/docs/source/quick-start.rst @@ -139,7 +139,7 @@ http://servername-or-ip:port_number Note: CollectOSS will run on port 5000 by default (you probably need to -change that in collectoss_operations.config for OSX) +change that in collection_operations.config for OSX) Stopping your CollectOSS Instance --------------------------------- diff --git a/docs/source/schema/regularly_used_data.rst b/docs/source/schema/regularly_used_data.rst index 14cdcb1f8..979c204c0 100644 --- a/docs/source/schema/regularly_used_data.rst +++ b/docs/source/schema/regularly_used_data.rst @@ -347,7 +347,7 @@ Repo_meta Repo_sbom_scans --------------- - This table links the collectoss_data schema to the collectoss_spdx schema to keep a list of repositories that need licenses scanned. (These are for file level license declarations, which are common in Linux Foundation projects, but otherwise not in wide use). + This table links the collection_data schema to the collectoss_spdx schema to keep a list of repositories that need licenses scanned. (These are for file level license declarations, which are common in Linux Foundation projects, but otherwise not in wide use). .. image:: images/repo_sbom_scans.png :width: 200 diff --git a/tests/test_helpers.py b/tests/test_helpers.py index dd850a23f..8ba765018 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -14,7 +14,7 @@ def set_search_path(dbapi_connection, connection_record): existing_autocommit = dbapi_connection.autocommit dbapi_connection.autocommit = True cursor = dbapi_connection.cursor() - cursor.execute("SET SESSION search_path=public,collectoss_data,collectoss_operations,spdx") + cursor.execute("SET SESSION search_path=public,collection_data,collection_operations,spdx") cursor.close() dbapi_connection.autocommit = existing_autocommit diff --git a/tests/test_workers/test_set_up_fixtures.py b/tests/test_workers/test_set_up_fixtures.py index 82787ca97..584c16745 100644 --- a/tests/test_workers/test_set_up_fixtures.py +++ b/tests/test_workers/test_set_up_fixtures.py @@ -153,7 +153,7 @@ def initialize_database_connections(self): "augur", "augur", "172.17.0.1", 5400, "test" ) - self.db_schema = 'collectoss_data' + self.db_schema = 'collection_data' self.helper_schema = 'collection_operations' self.helper_db = s.create_engine(DB_STR, poolclass=s.pool.NullPool, From 83ab6a4eb60e4c38b0c2514a24e78c50457a1a06 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 12:10:59 -0400 Subject: [PATCH 26/26] hardcode schema in migrations Signed-off-by: Adrian Edwards --- .../alembic/versions/31_update_pr_events_unique.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/collectoss/application/schema/alembic/versions/31_update_pr_events_unique.py b/collectoss/application/schema/alembic/versions/31_update_pr_events_unique.py index f6aeeca20..d903ae8fd 100644 --- a/collectoss/application/schema/alembic/versions/31_update_pr_events_unique.py +++ b/collectoss/application/schema/alembic/versions/31_update_pr_events_unique.py @@ -20,7 +20,7 @@ # conn = op.get_bind() # conn.execute(text(""" - # UPDATE pull_request_events + # UPDATE augur_data.pull_request_events # SET issue_event_src_id = substring(node_url FROM '.*/([0-9]+)$')::BIGINT; # """)) @@ -32,7 +32,7 @@ def upgrade(): with engine.connect() as conn: - result = conn.execute(text("SELECT COUNT(*) FROM pull_request_events WHERE issue_event_src_id=pr_platform_event_id")) + result = conn.execute(text("SELECT COUNT(*) FROM augur_data.pull_request_events WHERE issue_event_src_id=pr_platform_event_id")) total_rows = result.scalar() if total_rows != 0: print(f"Rows needing updated: {total_rows}") @@ -43,14 +43,14 @@ def upgrade(): result = conn.execute(text(""" WITH cte AS ( SELECT pr_event_id - FROM pull_request_events + FROM augur_data.pull_request_events WHERE issue_event_src_id=pr_platform_event_id LIMIT 250000 ) - UPDATE pull_request_events + UPDATE augur_data.pull_request_events SET issue_event_src_id = substring(node_url FROM '.*/([0-9]+)$')::BIGINT FROM cte - WHERE pull_request_events.pr_event_id = cte.pr_event_id + WHERE augur_data.pull_request_events.pr_event_id = cte.pr_event_id RETURNING 1; """)) @@ -77,7 +77,7 @@ def downgrade(): print("Please run in background. This downgrade will take a very *very* long time") conn = op.get_bind() conn.execute(text(""" - UPDATE pull_request_events + UPDATE augur_data.pull_request_events SET issue_event_src_id = pr_platform_event_id WHERE issue_event_src_id <> pr_platform_event_id; """)) \ No newline at end of file