From 9c0bfde945d27f642635492a51b01d03f1c74e00 Mon Sep 17 00:00:00 2001 From: klemen1999 Date: Mon, 24 Feb 2025 15:08:39 +0100 Subject: [PATCH 1/4] Added semgrep and some fixes --- .github/workflows/semgrep.yaml | 65 +++++++++++++++++++ Dockerfile | 12 ++++ .../dataset_annotation/owlv2_annotator.py | 4 +- 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/semgrep.yaml diff --git a/.github/workflows/semgrep.yaml b/.github/workflows/semgrep.yaml new file mode 100644 index 0000000..c0e755b --- /dev/null +++ b/.github/workflows/semgrep.yaml @@ -0,0 +1,65 @@ +name: Semgrep SAST Scan + +on: + pull_request: + +jobs: + semgrep: + # User definable name of this GitHub Actions job. + name: semgrep/ci + # If you are self-hosting, change the following `runs-on` value: + runs-on: ubuntu-latest + container: + # A Docker image with Semgrep installed. Do not change this. + image: returntocorp/semgrep + # Skip any PR created by dependabot to avoid permission issues: + if: (github.actor != 'dependabot[bot]') + permissions: + # required for all workflows + security-events: write + # only required for workflows in private repositories + actions: read + contents: read + + steps: + # Fetch project source with GitHub Actions Checkout. + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Perform Semgrep Analysis + # @NOTE: This is the actual semgrep command to scan your code. + # Modify the --config option to 'r/all' to scan using all rules, + # or use multiple flags to specify particular rules, such as + # --config r/all --config custom/rules + run: semgrep scan -q --sarif --config auto --config "p/secrets" . > semgrep-results.sarif + + - name: Pretty-Print SARIF Output + run: | + jq . semgrep-results.sarif > formatted-semgrep-results.sarif || echo "{}" + echo "Formatted SARIF Output (First 20 lines):" + head -n 20 formatted-semgrep-results.sarif || echo "{}" + + - name: Validate JSON Output + run: | + if ! jq empty formatted-semgrep-results.sarif > /dev/null 2>&1; then + echo "⚠️ Semgrep output is not valid JSON. Skipping annotations." + exit 0 + fi + + - name: Add PR Annotations for Semgrep Findings + run: | + total_issues=$(jq '.runs[0].results | length' formatted-semgrep-results.sarif) + if [[ "$total_issues" -eq 0 ]]; then + echo "✅ No Semgrep issues found!" + exit 0 + fi + + jq -c '.runs[0].results[]' formatted-semgrep-results.sarif | while IFS= read -r issue; do + file=$(echo "$issue" | jq -r '.locations[0].physicalLocation.artifactLocation.uri') + line=$(echo "$issue" | jq -r '.locations[0].physicalLocation.region.startLine') + message=$(echo "$issue" | jq -r '.message.text') + + if [[ -n "$file" && -n "$line" && -n "$message" ]]; then + echo "::error file=$file,line=$line,title=Semgrep Issue::${message}" + fi + done \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 0365f4c..604eadc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,19 @@ ARG BRANCH=main ## Clone the repository with the specified branch RUN git clone --branch ${BRANCH} https://github.com/luxonis/datadreamer.git +## Create a non-root user and switch to that user +RUN adduser --disabled-password --gecos "" non-root && \ + chown -R non-root:non-root /app + +## Switch to the non-root user +USER non-root + +## Install the Python package as the non-root user RUN cd datadreamer && pip install . +## Set PATH for the installed executable +ENV PATH="/home/non-root/.local/bin:/usr/local/bin:$PATH" + + ## define image execution ENTRYPOINT ["datadreamer"] diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 7c150e8..bbe089d 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -410,9 +410,9 @@ def release(self, empty_cuda_cache: bool = False) -> None: # Image-driven annotation url = "http://images.cocodataset.org/val2017/000000039769.jpg" - im = Image.open(requests.get(url, stream=True).raw) + im = Image.open(requests.get(url, stream=True).raw) # nosemgrep query_url = "http://images.cocodataset.org/val2017/000000058111.jpg" - query_image = Image.open(requests.get(query_url, stream=True).raw) + query_image = Image.open(requests.get(query_url, stream=True).raw) # nosemgrep final_boxes, final_scores, final_labels = annotator.annotate_batch( [im], [query_image], conf_threshold=0.9 From 401e61218e17c5efd2adab77f16e1c40fb04fc7c Mon Sep 17 00:00:00 2001 From: klemen1999 Date: Mon, 24 Feb 2025 15:14:32 +0100 Subject: [PATCH 2/4] extra space --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 604eadc..de9b687 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,5 @@ RUN cd datadreamer && pip install . ## Set PATH for the installed executable ENV PATH="/home/non-root/.local/bin:/usr/local/bin:$PATH" - -## define image execution +## Define image execution ENTRYPOINT ["datadreamer"] From 07dc932cf1dea37cc71ac62b25c4fcac7ab34f5a Mon Sep 17 00:00:00 2001 From: klemen1999 Date: Mon, 24 Feb 2025 15:30:08 +0100 Subject: [PATCH 3/4] pre-commit fix --- datadreamer/dataset_annotation/owlv2_annotator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index bbe089d..3498f11 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -410,9 +410,9 @@ def release(self, empty_cuda_cache: bool = False) -> None: # Image-driven annotation url = "http://images.cocodataset.org/val2017/000000039769.jpg" - im = Image.open(requests.get(url, stream=True).raw) # nosemgrep + im = Image.open(requests.get(url, stream=True).raw) # nosemgrep query_url = "http://images.cocodataset.org/val2017/000000058111.jpg" - query_image = Image.open(requests.get(query_url, stream=True).raw) # nosemgrep + query_image = Image.open(requests.get(query_url, stream=True).raw) # nosemgrep final_boxes, final_scores, final_labels = annotator.annotate_batch( [im], [query_image], conf_threshold=0.9 From 832df053b5c0392b4e6f0cb7ae4d4f727dbe743f Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 25 Feb 2025 16:02:51 +0000 Subject: [PATCH 4/4] fix: sdxl-turbo cpu dtype --- datadreamer/image_generation/sdxl_turbo_image_generator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/datadreamer/image_generation/sdxl_turbo_image_generator.py b/datadreamer/image_generation/sdxl_turbo_image_generator.py index 39e53dd..1368e62 100644 --- a/datadreamer/image_generation/sdxl_turbo_image_generator.py +++ b/datadreamer/image_generation/sdxl_turbo_image_generator.py @@ -41,8 +41,6 @@ def _init_gen_model(self) -> AutoPipelineForText2Image: if self.device == "cpu": base = AutoPipelineForText2Image.from_pretrained( "stabilityai/sdxl-turbo", - # variant="fp16", - torch_dtype=torch.float32, use_safetensors=True, ) base.to("cpu")