Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing premium data points for organizations #1374

Merged
merged 33 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
99be0c6
Add missing premium data points for organizations
skwowet Aug 22, 2023
ca61e0d
Add migrations for the newly added org fields
skwowet Aug 23, 2023
152f7b5
drop parentUrl and add immediateParent column also modify find/create…
skwowet Aug 23, 2023
feedeef
Remove parentUrl instances in the required areas
skwowet Aug 23, 2023
c0c08ec
Fix failing tests
skwowet Aug 24, 2023
5dbab0f
Format code and run prettier
skwowet Aug 24, 2023
7d65178
Fix organization failing test
skwowet Aug 24, 2023
8a2e6fa
Add premium data points to opensearch
skwowet Aug 24, 2023
b5ad314
Merge branch 'main' into improvement/org-premium-data-endpoints
skwowet Aug 24, 2023
f1819dd
Support premium data points in frontend
joanagmaia Aug 25, 2023
a8d9d0e
Remove duplicated key
joanagmaia Aug 25, 2023
6ef03e4
drop inferredRevenue instead refactor revenueRange for the same info
skwowet Aug 26, 2023
ed08d71
Merge branch 'improvement/org-premium-data-endpoints' of github.com:C…
skwowet Aug 26, 2023
2717a3c
Format and run prettier
skwowet Aug 26, 2023
febd13e
drop parentUrl in organizationCache and handle edge cases in inferred…
skwowet Aug 27, 2023
54f75f0
Update attributes and some fixes
joanagmaia Aug 28, 2023
8091d2f
Update attributes values
joanagmaia Aug 28, 2023
31bdf09
Add employee_count
joanagmaia Aug 28, 2023
7262949
Include tags and location, drop some data points
skwowet Aug 28, 2023
0313448
Merge branch 'improvement/org-premium-data-endpoints' of github.com:C…
skwowet Aug 28, 2023
0bc3a15
Refactor display of json fields
joanagmaia Aug 28, 2023
63f0199
Add new premium data points to hubspot
skwowet Aug 28, 2023
d7bddd7
Merge branch 'main' into improvement/org-premium-data-endpoints
skwowet Aug 29, 2023
8c99745
Run prettier and fix lint errors
skwowet Aug 29, 2023
9d72e49
Fix validation and typo
joanagmaia Aug 29, 2023
b563b2f
Refactor validation
joanagmaia Aug 29, 2023
019e790
Synchronous script
Aug 29, 2023
e161b4a
Limit to 3
Aug 29, 2023
0a857cb
add script to package.json
skwowet Aug 29, 2023
b8d9a64
Org enrichment hotfixes
skwowet Aug 29, 2023
112cd96
Format and change avgEmployeeTenure field type
skwowet Aug 29, 2023
e41d16d
Fix display of info
joanagmaia Aug 29, 2023
0462a19
Merge branch 'main' into improvement/org-premium-data-endpoints
skwowet Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
"script:process-webhook": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/process-webhook.ts",
"script:send-weekly-analytics-email": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/send-weekly-analytics-email.ts",
"script:unleash-init": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/unleash-init.ts",
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts"
"script:enrich-members-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrich-members-and-organizations.ts",
"script:enrich-organizations": "SERVICE=script TS_NODE_TRANSPILE_ONLY=true node -r tsconfig-paths/register -r ts-node/register src/bin/scripts/enrichOrganizationsSynchronous.ts"
},
"dependencies": {
"@aws-sdk/client-comprehend": "^3.159.0",
Expand Down
3 changes: 0 additions & 3 deletions backend/src/api/components/member/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -337,7 +336,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -451,7 +449,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down
3 changes: 0 additions & 3 deletions backend/src/api/components/organization/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -46,7 +45,6 @@ components:
name: Pied Piper
url: https://piedpiper.io
description: The new internet
parentUrl: null
emails:
- richard@piedpiper.io
- hello@piedpiper.io
Expand Down Expand Up @@ -93,7 +91,6 @@ components:
name: Hooli
url: https://hooli.xyz
description: Hooli is an international corporation founded by Gavin Belson and Peter Gregory
parentUrl: null
emails:
- gavin@hooli.xyz
phoneNumbers: null
Expand Down
60 changes: 60 additions & 0 deletions backend/src/bin/scripts/enrichOrganizationsSynchronous.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import commandLineArgs from 'command-line-args'
import commandLineUsage from 'command-line-usage'
import * as fs from 'fs'
import path from 'path'
import { getServiceLogger } from '@crowd/logging'
import { BulkorganizationEnrichmentWorker } from '@/serverless/microservices/nodejs/bulk-enrichment/bulkOrganizationEnrichmentWorker'

/* eslint-disable no-console */

const banner = fs.readFileSync(path.join(__dirname, 'banner.txt'), 'utf8')

const log = getServiceLogger()

const options = [
{
name: 'tenant',
alias: 't',
type: String,
description: 'The unique ID of tenant that you would like to enrich.',
},
{
name: 'help',
alias: 'h',
type: Boolean,
description: 'Print this usage guide.',
},
]
const sections = [
{
content: banner,
raw: true,
},
{
header: 'Enrich members, organizations or both of the tenant',
content: 'Enrich all enrichable members, organizations or both of the tenant',
},
{
header: 'Options',
optionList: options,
},
]

const usage = commandLineUsage(sections)
const parameters = commandLineArgs(options)

if (parameters.help || (!parameters.tenant && (!parameters.organization || !parameters.member))) {
console.log(usage)
} else {
setImmediate(async () => {
const tenantIds = parameters.tenant.split(',')
const limit = 3

for (const tenantId of tenantIds) {
await BulkorganizationEnrichmentWorker(tenantId, limit, true)
log.info(`Done for tenant ${tenantId}`)
}

process.exit(0)
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ALTER TABLE public."organizations" DROP COLUMN "affiliatedProfiles";
ALTER TABLE public."organizations" DROP COLUMN "allSubsidiaries";
ALTER TABLE public."organizations" DROP COLUMN "alternativeDomains";
ALTER TABLE public."organizations" DROP COLUMN "alternativeNames";
ALTER TABLE public."organizations" DROP COLUMN "averageEmployeeTenure";
ALTER TABLE public."organizations" DROP COLUMN "averageTenureByLevel";
ALTER TABLE public."organizations" DROP COLUMN "averageTenureByRole";
ALTER TABLE public."organizations" DROP COLUMN "directSubsidiaries";
ALTER TABLE public."organizations" DROP COLUMN "employeeChurnRate";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonth";
ALTER TABLE public."organizations" DROP COLUMN "employeeGrowthRate";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonthByLevel";
ALTER TABLE public."organizations" DROP COLUMN "employeeCountByMonthByRole";
ALTER TABLE public."organizations" DROP COLUMN "gicsSector";
ALTER TABLE public."organizations" DROP COLUMN "grossAdditionsByMonth";
ALTER TABLE public."organizations" DROP COLUMN "grossDeparturesByMonth";
ALTER TABLE public."organizations" DROP COLUMN "ultimateParent";
ALTER TABLE public."organizations" DROP COLUMN "immediateParent";
ALTER TABLE public."organizations" ADD COLUMN "parentUrl" TEXT NULL;
ALTER TABLE public."organizationCaches" ADD COLUMN "parentUrl" TEXT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ALTER TABLE public."organizations" ADD COLUMN "affiliatedProfiles" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "allSubsidiaries" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "alternativeDomains" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "alternativeNames" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "averageEmployeeTenure" FLOAT NULL;
ALTER TABLE public."organizations" ADD COLUMN "averageTenureByLevel" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "averageTenureByRole" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "directSubsidiaries" TEXT[];
ALTER TABLE public."organizations" ADD COLUMN "employeeChurnRate" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeGrowthRate" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonthByLevel" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "employeeCountByMonthByRole" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "gicsSector" TEXT NULL;
ALTER TABLE public."organizations" ADD COLUMN "grossAdditionsByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "grossDeparturesByMonth" JSONB NULL;
ALTER TABLE public."organizations" ADD COLUMN "ultimateParent" TEXT NULL;
ALTER TABLE public."organizations" ADD COLUMN "immediateParent" TEXT NULL;
ALTER TABLE public."organizations" DROP COLUMN "parentUrl";
ALTER TABLE public."organizationCaches" DROP COLUMN "parentUrl";
74 changes: 72 additions & 2 deletions backend/src/database/models/organization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ export default (sequelize) => {
type: DataTypes.TEXT,
allowNull: true,
},
parentUrl: {
immediateParent: {
type: DataTypes.TEXT,
allowNull: true,
},
ultimateParent: {
type: DataTypes.TEXT,
allowNull: true,
},
Expand Down Expand Up @@ -79,10 +83,12 @@ export default (sequelize) => {
employees: {
type: DataTypes.INTEGER,
allowNull: true,
comment: 'total employee count of the company',
},
revenueRange: {
type: DataTypes.JSONB,
allowNull: true,
comment: 'inferred revenue range of the company',
},
importHash: {
type: DataTypes.STRING(255),
Expand Down Expand Up @@ -131,7 +137,7 @@ export default (sequelize) => {
type: {
type: DataTypes.TEXT,
allowNull: true,
comment: "The comnapny's type. For example NGO",
comment: "The company's type. For example NGO",
},
employeeCountByCountry: {
type: DataTypes.JSONB,
Expand Down Expand Up @@ -159,6 +165,70 @@ export default (sequelize) => {
type: DataTypes.JSONB,
defaultValue: {},
},
affiliatedProfiles: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
allSubsidiaries: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
alternativeDomains: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
alternativeNames: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
averageEmployeeTenure: {
type: DataTypes.FLOAT,
allowNull: true,
},
averageTenureByLevel: {
type: DataTypes.JSONB,
allowNull: true,
},
averageTenureByRole: {
type: DataTypes.JSONB,
allowNull: true,
},
directSubsidiaries: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
},
employeeChurnRate: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeGrowthRate: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonthByLevel: {
type: DataTypes.JSONB,
allowNull: true,
},
employeeCountByMonthByRole: {
type: DataTypes.JSONB,
allowNull: true,
},
gicsSector: {
type: DataTypes.TEXT,
allowNull: true,
},
grossAdditionsByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
grossDeparturesByMonth: {
type: DataTypes.JSONB,
allowNull: true,
},
},
{
indexes: [
Expand Down
4 changes: 0 additions & 4 deletions backend/src/database/models/organizationCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ export default (sequelize) => {
type: DataTypes.TEXT,
allowNull: true,
},
parentUrl: {
type: DataTypes.TEXT,
allowNull: true,
},
emails: {
type: DataTypes.ARRAY(DataTypes.TEXT),
allowNull: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ const toCreate = {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
website: 'https://crowd.dev',
location: 'Berlin',
github: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ const toCreate = {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
twitter: {
handle: 'CrowdDotDev',
id: '1362101830923259908',
Expand Down Expand Up @@ -53,6 +52,24 @@ const toCreate = {
address: null,
profiles: null,
manuallyCreated: false,
affiliatedProfiles: null,
allSubsidiaries: null,
alternativeDomains: null,
alternativeNames: null,
averageEmployeeTenure: null,
averageTenureByLevel: null,
averageTenureByRole: null,
directSubsidiaries: null,
employeeChurnRate: null,
employeeCountByMonth: null,
employeeGrowthRate: null,
employeeCountByMonthByLevel: null,
employeeCountByMonthByRole: null,
gicsSector: null,
grossAdditionsByMonth: null,
grossDeparturesByMonth: null,
ultimateParent: null,
immediateParent: null,
}

async function createMembers(options) {
Expand Down Expand Up @@ -558,7 +575,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 424242424'],
logo: 'https://logo.clearbit.com/crowd.dev',
tags: ['community', 'growth', 'developer-first'],
parentUrl: null,
twitter: {
handle: 'CrowdDotDev',
id: '1362101830923259908',
Expand Down Expand Up @@ -590,7 +606,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 54545454'],
logo: 'https://logo.clearbit.com/piedpiper',
tags: ['new-internet', 'compression'],
parentUrl: null,
twitter: {
handle: 'piedPiper',
id: '1362101830923259908',
Expand Down Expand Up @@ -622,7 +637,6 @@ describe('OrganizationRepository tests', () => {
phoneNumbers: ['+42 12121212'],
logo: 'https://logo.clearbit.com/hooli',
tags: ['not-google', 'elephant'],
parentUrl: null,
twitter: {
handle: 'hooli',
id: '1362101830923259908',
Expand Down
32 changes: 29 additions & 3 deletions backend/src/database/repositories/organizationCacheRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ class OrganizationCacheRepository {
'name',
'url',
'description',
'parentUrl',
'emails',
'phoneNumbers',
'logo',
Expand Down Expand Up @@ -73,7 +72,6 @@ class OrganizationCacheRepository {
'name',
'url',
'description',
'parentUrl',
'emails',
'phoneNumbers',
'logo',
Expand Down Expand Up @@ -119,7 +117,35 @@ class OrganizationCacheRepository {
return this.findById(record.id, options)
}

static async bulkUpdate(data: any[], options: IRepositoryOptions): Promise<void> {
static async bulkUpdate(
data: any[],
options: IRepositoryOptions,
isEnrichment: boolean = false,
): Promise<void> {
const transaction = SequelizeRepository.getTransaction(options)

if (isEnrichment) {
// Fetch existing organizations
const existingRecords = await options.database.organizationCache.findAll({
where: {
id: {
[options.database.Sequelize.Op.in]: data.map((x) => x.id),
},
},
transaction,
})

// Merge existing tags with new tags instead of overwriting
data = data.map((org) => {
const existingOrg = existingRecords.find((record) => record.id === org.id)
if (existingOrg && existingOrg.tags) {
// Merge existing and new tags without duplicates
org.tags = lodash.uniq([...org.tags, ...existingOrg.tags])
}
return org
})
}

for (const org of data) {
this.update(org.id, org, options)
}
Expand Down
Loading