Skip to content

Commit

Permalink
Add missing premium data points for organizations (#1374)
Browse files Browse the repository at this point in the history
Co-authored-by: Joana Maia <joana@crowd.dev>
Co-authored-by: Joan Reyero <joan@crowd.dev>
  • Loading branch information
3 people authored Aug 29, 2023
1 parent 165662c commit 30f1d63
Show file tree
Hide file tree
Showing 70 changed files with 1,635 additions and 248 deletions.
3 changes: 2 additions & 1 deletion backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"script:process-webhook": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/process-webhook.ts",
"script:send-weekly-analytics-email": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/send-weekly-analytics-email.ts",
"script:unleash-init": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/unleash-init.ts",
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts"
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts",
"script:enrich-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrichOrganizationsSynchronous.ts"
},
"dependencies": {
"@aws-sdk/client-comprehend": "^3.159.0",
Expand Down
3 changes: 0 additions & 3 deletions backend/src/api/components/member/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -337,7 +336,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -451,7 +449,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down
3 changes: 0 additions & 3 deletions backend/src/api/components/organization/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -46,7 +45,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -93,7 +91,6 @@ components:
name: Hooli
url: https://hooli.xyz
description: Hooli is an international corporation founded by Gavin Belson and Peter Gregory
parentUrl: null
emails:
- gavin@hooli.xyz
phoneNumbers: null
Expand Down
60 changes: 60 additions & 0 deletions backend/src/bin/scripts/enrichOrganizationsSynchronous.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import commandLineArgs from 'command-line-args'
import commandLineUsage from 'command-line-usage'
import * as fs from 'fs'
import path from 'path'
import { getServiceLogger } from '@crowd/logging'
import { BulkorganizationEnrichmentWorker } from '@/serverless/microservices/nodejs/bulk-enrichment/bulkOrganizationEnrichmentWorker'

/* eslint-disable no-console */

const banner = fs.readFileSync(path.join(__dirname, 'banner.txt'), 'utf8')

const log = getServiceLogger()

const options = [
{
name: 'tenant',
alias: 't',
type: String,
description: 'The unique ID of tenant that you would like to enrich.',
},
{
name: 'help',
alias: 'h',
type: Boolean,
description: 'Print this usage guide.',
},
]
const sections = [
{
content: banner,
raw: true,
},
{
header: 'Enrich members, organizations or both of the tenant',
content: 'Enrich all enrichable members, organizations or both of the tenant',
},
{
header: 'Options',
optionList: options,
},
]

const usage = commandLineUsage(sections)
const parameters = commandLineArgs(options)

if (parameters.help || (!parameters.tenant && (!parameters.organization || !parameters.member))) {
console.log(usage)
} else {
setImmediate(async () => {
const tenantIds = parameters.tenant.split(',')
const limit = 3

for (const tenantId of tenantIds) {
await BulkorganizationEnrichmentWorker(tenantId, limit, true)
log.info(`Done for tenant ${tenantId}`)
}

process.exit(0)
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ALTER TABLE public."organizations" DROP COLUMN "affiliatedProfiles";
ALTER TABLE public."organizations" DROP COLUMN "allSubsidiaries";
ALTER TABLE public."organizations" DROP COLUMN "alternativeDomains";
ALTER TABLE public."organizations" DROP COLUMN "alternativeNames";
ALTER TABLE public."organizations" DROP COLUMN "averageEmployeeTenure";
ALTER TABLE public."organizations" DROP COLUMN "averageTenureByLevel";
ALTER TABLE public."organizations" DROP COLUMN "averageTenureByRole";
ALTER TABLE public."organizations" DROP COLUMN "directSubsidiaries";
ALTER TABLE public."organizations" DROP COLUMN "employeeChurnRate";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonth";
ALTER TABLE public."organizations" DROP COLUMN "employeeGrowthRate";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonthByLevel";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonthByRole";
ALTER TABLE public."organizations" DROP COLUMN "gicsSector";
ALTER TABLE public."organizations" DROP COLUMN "grossAdditionsByMonth";
ALTER TABLE public."organizations" DROP COLUMN "grossDeparturesByMonth";
ALTER TABLE public."organizations" DROP COLUMN "ultimateParent";
ALTER TABLE public."organizations" DROP COLUMN "immediateParent";
ALTER TABLE public."organizations" ADD COLUMN "parentUrl" TEXT NULL;
ALTER TABLE public."organizationCaches" ADD COLUMN "parentUrl" TEXT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ALTER TABLE public."organizations" ADD COLUMN "affiliatedProfiles" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "allSubsidiaries" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "alternativeDomains" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "alternativeNames" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "averageEmployeeTenure" FLOAT NULL;
ALTER TABLE public."organizations" ADD COLUMN "averageTenureByLevel" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "averageTenureByRole" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "directSubsidiaries" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "employeeChurnRate" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeGrowthRate" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonthByLevel" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonthByRole" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "gicsSector" TEXT NULL;
ALTER TABLE public."organizations" ADD COLUMN "grossAdditionsByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "grossDeparturesByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "ultimateParent" TEXT NULL;
ALTER TABLE public."organizations" ADD COLUMN "immediateParent" TEXT NULL;
ALTER TABLE public."organizations" DROP COLUMN "parentUrl";
ALTER TABLE public."organizationCaches" DROP COLUMN "parentUrl";
74 changes: 72 additions & 2 deletions backend/src/database/models/organization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ export default (sequelize) => {
type: DataTypes.TEXT,
allowNull: true,
},
parentUrl: {
immediateParent: {
type: DataTypes.TEXT,
allowNull: true,
},
ultimateParent: {
type: DataTypes.TEXT,
allowNull: true,
},
Expand Down Expand Up @@ -79,10 +83,12 @@ export default (sequelize) => {
employees: {
type: DataTypes.INTEGER,
allowNull: true,
comment: 'total employee count of the company',
},
revenueRange: {
type: DataTypes.JSONB,
allowNull: true,
comment: 'inferred revenue range of the company',
},
importHash: {
type: DataTypes.STRING(255),
Expand Down Expand Up @@ -131,7 +137,7 @@ export default (sequelize) => {
type: {
type: DataTypes.TEXT,
allowNull: true,
comment: "The comnapny's type. For example NGO",
comment: "The company's type. For example NGO",
},
employeeCountByCountry: {
type: DataTypes.JSONB,
Expand Down Expand Up @@ -159,6 +165,70 @@ export default (sequelize) => {
type: DataTypes.JSONB,
defaultValue: {},
},
affiliatedProfiles: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
allSubsidiaries: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
alternativeDomains: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
alternativeNames: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
averageEmployeeTenure: {
type: DataTypes.FLOAT,
allowNull: true,
},
averageTenureByLevel: {
type: DataTypes.JSONB,
allowNull: true,
},
averageTenureByRole: {
type: DataTypes.JSONB,
allowNull: true,
},
directSubsidiaries: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
employeeChurnRate: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeGrowthRate: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonthByLevel: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonthByRole: {
type: DataTypes.JSONB,
allowNull: true,
},
gicsSector: {
type: DataTypes.TEXT,
allowNull: true,
},
grossAdditionsByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
grossDeparturesByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
},
{
indexes: [
Expand Down
4 changes: 0 additions & 4 deletions backend/src/database/models/organizationCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ export default (sequelize) => {
type: DataTypes.TEXT,
allowNull: true,
},
parentUrl: {
type: DataTypes.TEXT,
allowNull: true,
},
emails: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ const toCreate = {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
website: 'https://crowd.dev',
location: 'Berlin',
github: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ const toCreate = {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
twitter: {
handle: 'CrowdDotDev',
id: '1362101830923259908',
Expand Down Expand Up @@ -53,6 +52,24 @@ const toCreate = {
address: null,
profiles: null,
manuallyCreated: false,
affiliatedProfiles: null,
allSubsidiaries: null,
alternativeDomains: null,
alternativeNames: null,
averageEmployeeTenure: null,
averageTenureByLevel: null,
averageTenureByRole: null,
directSubsidiaries: null,
employeeChurnRate: null,
employeeCountByMonth: null,
employeeGrowthRate: null,
employeeCountByMonthByLevel: null,
employeeCountByMonthByRole: null,
gicsSector: null,
grossAdditionsByMonth: null,
grossDeparturesByMonth: null,
ultimateParent: null,
immediateParent: null,
}

async function createMembers(options) {
Expand Down Expand Up @@ -558,7 +575,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
twitter: {
handle: 'CrowdDotDev',
id: '1362101830923259908',
Expand Down Expand Up @@ -590,7 +606,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 54545454'],
logo: 'https://logo.clearbit.com/piedpiper',
tags: ['new-internet', 'compression'],
parentUrl: null,
twitter: {
handle: 'piedPiper',
id: '1362101830923259908',
Expand Down Expand Up @@ -622,7 +637,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 12121212'],
logo: 'https://logo.clearbit.com/hooli',
tags: ['not-google', 'elephant'],
parentUrl: null,
twitter: {
handle: 'hooli',
id: '1362101830923259908',
Expand Down
32 changes: 29 additions & 3 deletions backend/src/database/repositories/organizationCacheRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class OrganizationCacheRepository {
'name',
'url',
'description',
'parentUrl',
'emails',
'phoneNumbers',
'logo',
Expand Down Expand Up @@ -73,7 +72,6 @@ class OrganizationCacheRepository {
'name',
'url',
'description',
'parentUrl',
'emails',
'phoneNumbers',
'logo',
Expand Down Expand Up @@ -119,7 +117,35 @@ class OrganizationCacheRepository {
return this.findById(record.id, options)
}

static async bulkUpdate(data: any[], options: IRepositoryOptions): Promise<void> {
static async bulkUpdate(
data: any[],
options: IRepositoryOptions,
isEnrichment: boolean = false,
): Promise<void> {
const transaction = SequelizeRepository.getTransaction(options)

if (isEnrichment) {
// Fetch existing organizations
const existingRecords = await options.database.organizationCache.findAll({
where: {
id: {
[options.database.Sequelize.Op.in]: data.map((x) => x.id),
},
},
transaction,
})

// Merge existing tags with new tags instead of overwriting
data = data.map((org) => {
const existingOrg = existingRecords.find((record) => record.id === org.id)
if (existingOrg && existingOrg.tags) {
// Merge existing and new tags without duplicates
org.tags = lodash.uniq([...org.tags, ...existingOrg.tags])
}
return org
})
}

for (const org of data) {
this.update(org.id, org, options)
}
Expand Down
Loading

0 comments on commit 30f1d63

Please sign in to comment.