Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(libraries-release-data): add service name lookup from artifact id for gapic libraries, add apiary libraries and spring cloud gcp to release data table. #6437

Merged
merged 4 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 44 additions & 11 deletions .kokoro/nightly/create-versions-csv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Output:
# The script generates cloud_java_client_library_release_dates.csv that holds the data defined below.
# It has artifact_id,service_name,version, and release_date columns.
# this csv file will be uploaded to (project) cloud-java-metrics.(dataset) client_library_versions. (table) cloud_java_client_library_release_dates
# this csv file is uploaded to (project) cloud-java-metrics.(dataset) client_library_versions. (table) cloud_java_client_library_release_dates
# using bq load command

# Fail on any error.
Expand All @@ -13,6 +13,10 @@ set -x

cd github/java-cloud-bom

# prepare list of artifact id and service name match
.kokoro/nightly/get-service-names.sh
.kokoro/nightly/get-apiary-service-names.sh

mvn -B clean install

cd libraries-release-data
Expand All @@ -30,32 +34,62 @@ sed -i '/libraries-release-data/d' unfiltered-libraries.txt
sort unfiltered-libraries.txt | uniq > libraries.txt
rm -f unfiltered-libraries.txt

service_file="artifacts_to_services.txt"

cat libraries.txt | while read line; do

group_id=${line%:*}
artifact_id=${line#*:}
new_group_id="${group_id//.//}"
service_name=${artifact_id#*-cloud-}

if [[ "${artifact_id}" == google-cloud-storage ]]; then
service_name=bigstore
# Check if artifactId contains "emulator"
if [[ $artifact_id =~ .*emulator.* ]]; then
echo "artifactId contains 'emulator': $artifactId"
continue
fi
if [[ "${artifact_id}" == google-cloud-storage-transfer ]]; then
service_name=storagetransfer
service_name=$(grep "^${artifact_id}," "$service_file" | cut -d ',' -f 2)
if [[ -n $service_name ]]; then
echo "Service Name found: $service_name"
else
echo "No matching service name found for artifactId: $artifact_id"
fi

URL=https://repo1.maven.org/maven2/$new_group_id/$artifact_id

../.kokoro/nightly/fetch-library-data.sh $URL $artifact_id $service_name
../.kokoro/nightly/fetch-library-data.sh $URL $artifact_id $service_name >> cloud_java_client_library_release_dates.csv

done

# apiary list

sort artifacts_to_services_apiary.txt | uniq > artifacts_to_services_apiary_uniq.txt

apiary_list="artifacts_to_services_apiary_uniq.txt"

# Read the input file line by line
while IFS= read -r line; do
# Split line into values using comma as delimiter
IFS=',' read -r -a values <<< "$line"
group_id=${values[0]}
artifact_id=${values[1]}
service_name=${values[2]}
new_group_id="${group_id//./\/}"
URL=https://repo1.maven.org/maven2/$new_group_id/$artifact_id
../.kokoro/nightly/fetch-library-data.sh $URL $artifact_id $service_name >> cloud_java_client_library_release_dates.csv
done < "$apiary_list"

# add spring cloud gcp, "service_name" is tool_name
../.kokoro/nightly/fetch-library-data.sh https://repo1.maven.org/maven2/com/google/cloud/spring-cloud-gcp-dependencies/ spring-cloud-gcp-dependencies spring-cloud-gcp >> cloud_java_client_library_release_dates.csv
../.kokoro/nightly/fetch-library-data.sh https://repo1.maven.org/maven2/org/springframework/cloud/spring-cloud-gcp-dependencies/ spring-cloud-gcp-dependencies spring-cloud-gcp >> cloud_java_client_library_release_dates.csv

rm -f libraries.txt
rm -f artifacts_to_services_apiary.txt
rm -f "$apiary_list"

sed 's/ \+/,/g' cloud_java_client_library_release_dates_tsv.txt > cloud_java_client_library_release_dates.csv
sed -i '1s/^/version,release_date,artifact_id,service_name\n/' cloud_java_client_library_release_dates.csv

# remove where service match not found
sed -i '/,$/d' cloud_java_client_library_release_dates.csv

echo "Inserting client_library_versions.cloud_java_client_library_release_dates. First 10 lines:"
head cloud_java_client_library_release_dates.csv
echo "===================="
Expand All @@ -64,6 +98,5 @@ bq load --skip_leading_rows=1 --project_id=cloud-java-metrics --source_format=CS
client_library_versions.cloud_java_client_library_release_dates \
cloud_java_client_library_release_dates.csv


rm -f cloud_java_client_library_release_dates_tsv.txt
rm -f cloud_java_client_library_release_dates.csv
rm -f artifacts_to_services.txt
36 changes: 19 additions & 17 deletions .kokoro/nightly/fetch-library-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# input for this script will be URL, artifact_id and service_name
# example: https://repo1.maven.org/maven2/com/google/cloud/google-cloud-vision google-cloud-vision vision

# output: cloud_java_client_library_release_dates_tsv.txt which contains
# artifact_id,service_name,version, and release_date for the artifacts (without the column headers)
# output: a line in the format of
# artifact_id,service_name,version, and release_date for the artifacts

mavenCentralURL=$1
artifact_id=$2
Expand All @@ -15,20 +15,22 @@ wget -O mavenFile --referer --recursive -nd --no-parent \
--header="User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" \
${1}

grep -E '<a href=".*">' mavenFile > mavenContents.txt
outputFile="maven_versions_and_dates.txt"
# assume semantic versions, starting with number. Get lines from file that looks like
# '<a href="0.10.0-beta/" title="0.10.0-beta/">0.10.0-beta/</a> 2017-03-17 00:01 - '
grep -E '<a href=\"[0-9]|[a-z].*\"\s' mavenFile | \
grep -v -E '(metadata)|(meta name)' | \
# remove content between '/" title=' and '</a>'
sed -e 's/\/"\stitle=.*<\/a>//' | \
# remove content before version
sed -e 's/<a href=\"//' | \
# replace multiple spaces
sed -E 's/[[:space:]]{3,}/;/g' | \
# get version and date only
awk -F'[ ;]' '{print $1, $2}' | \
# insert artifact_id and service_name
awk '{$3=a}1' a="${artifact_id}" | \
awk '{$4=b}1' b="${service_name}" | \
sed 's/ \+/,/g'

awk '/a/ {print $2 "\t" $4}' mavenContents.txt > finalContents.txt
sed -i 1d finalContents.txt
sed -i '/maven-metadata/d' finalContents.txt
sed -i 's/href="//g' finalContents.txt
sed -i 's/"//g' finalContents.txt
sed -i 's|/||g' finalContents.txt
awk '{$3=a}1' a="${2}" finalContents.txt > newfile.txt
awk '{$4=b}1' b="${3}" newfile.txt > final.txt
cat final.txt >> cloud_java_client_library_release_dates_tsv.txt

rm -f final.txt
rm -f newfile.txt
rm -f mavenFile
rm -f mavenContents.txt
rm -f finalContents.txt
50 changes: 50 additions & 0 deletions .kokoro/nightly/get-apiary-service-names.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

# This script should download
# discovery docs: git@github.com:googleapis/discovery-artifact-manager.git
# apiary repo git@github.com:googleapis/google-api-java-client-services.git
# from discovery docs for each service
# parse artifact-id ("name") and service name ("rootUrl").

# Run this script from repo root dir
# input: N/A
# output: txt file with comma separated group_id, artifact_id, service_name.

git clone https://github.com/googleapis/discovery-artifact-manager.git

cd ./discovery-artifact-manager/discoveries || exit
output_filename="../../libraries-release-data/artifacts_to_services_apiary.txt"

# install jq to extract info from JSON data
sudo apt-get update
sudo apt-get install -q -y jq

# loop through dicovery json files
for file in *.json; do
# Use jq to extract the "name" field

# group_id logic: https://github.com/googleapis/google-api-java-client-services/blob/421c5d6ed56d5eb1257d3fc057d7d6b4fd2f9bb7/generator/src/googleapis/codegen/utilities/maven_utils.py#L50
# artifact_id logic: https://github.com/googleapis/google-api-java-client-services/blob/421c5d6ed56d5eb1257d3fc057d7d6b4fd2f9bb7/generator/src/googleapis/codegen/utilities/maven_utils.py#L42-L47
# default_host https://github.com/googleapis/discovery-artifact-manager/blob/9f6638a9950991d4fe67d75bdb539e6d2be20541/google-api-client-generator/src/googleapis/codegen/languages/java/default/templates/___package___/___api_className___.java.tmpl#L44
artifact_id_suffix=$(jq -r '.name' "$file")
default_host=$(jq -r '.rootUrl' "$file")
owner_domain=$(jq -r '.ownerDomain' "$file")

if [[ "$default_host" =~ ^https:// ]] && [ -n "$artifact_id_suffix" ] && [ -n "$owner_domain" ]; then
if [[ "$owner_domain" != 'google.com' ]]; then
echo "$owner_domain =============="
continue
fi
group_id="com.google.apis"
service_name=$(echo "$default_host" | cut -d'/' -f3 | cut -d'.' -f1)
artifact_id="google-api-services-${artifact_id_suffix}"
echo "${group_id},${artifact_id},${service_name}" >> "$output_filename"
else
echo "$default_host: Not a valid URL or No 'name' field found in $file"
fi

done

cd ../..

rm -rf discovery-artifact-manager/
65 changes: 65 additions & 0 deletions .kokoro/nightly/get-service-names.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

# This scripts downloads google-cloud-java repo, loop through modules with names starting "java-",
# grabs artifactId from pom.xml file within submodule name starting with "google-", and
# service name from *StubSettings.java file.

# Run this script from repo root dir
# input: N/A
# output: txt file with comma separated artifact_id, service_name.

git clone https://github.com/googleapis/google-cloud-java.git

cd ./google-cloud-java || exit
filename="../artifacts_to_services.txt"

for module in $(find . -mindepth 2 -maxdepth 2 -name pom.xml | sort | xargs dirname); do
echo "module: ${module}"
# Only modules starting with java- contain client library artifacts.
if [[ ${module} != ./java-* ]]; then
echo "not a client library, continue..."
continue
fi
# special cases, add manually later.
if [[ ${module} == ./java-dns ]] || [[ ${module} == ./java-grafeas ]] || [[ ${module} == ./java-notification ]] || [[ ${module} == ./java-alloydb-connectors ]]; then
continue
fi
cd "${module}" || exit
# Find submodule with name starting with "google-", this is to exclude proto, grpc and bom folders,
# and locate artifact id of client library
folder=$(find . -mindepth 1 -maxdepth 1 -type d -name "google-*" ! -name "*-bom" )
echo "folder: ${folder}"
cd "${folder}" || continue
artifact_id_string=$(find . -name 'pom.xml' -print -quit | xargs grep -m 1 '<artifactId>' | cut -d '>' -f 2 | cut -d '<' -f 1)
echo "artifact_id_string: ${artifact_id_string}"
cd .. # exist from folder ${folder}

# Find *StubSettings file, get the first line containing '.googleapis.com:443'
# Extract service name from it
string=$(find . -name '*StubSettings.java' -print -quit | xargs grep -m 1 '.googleapis.com:443')
service_name=$(echo "${string}" | grep -o '".*"' | tr -d '"' | cut -d "." -f 1 | cut -d "-" -f 1)
echo "service name: ${service_name}"
echo "${artifact_id_string}, ${service_name}" >> "$filename"
cd .. # exit from ${module}
done

# add handwritten libraries manually.
{
echo "google-cloud-bigquery, bigquery"
echo "google-cloud-bigtable, bigtable"
echo "google-cloud-bigquerystorage, bigquerystorage"
echo "google-cloud-datastore, datastore"
echo "google-cloud-firestore, firestore"
echo "google-cloud-logging, logging"
echo "google-cloud-pubsub, pubsub"
echo "google-cloud-pubsublite, pubsublite"
echo "google-cloud-storage, bigstore"
echo "google-cloud-storage-control, storage"
echo "google-cloud-spanner, spanner"
echo "google-cloud-dns, dns"
} >> "./artifacts_to_services.txt"

cd ..
mv ./google-cloud-java/artifacts_to_services.txt ./libraries-release-data/artifacts_to_services.txt
# clean up
rm -rf google-cloud-java/
Loading