Skip to content

Commit 68d0a8a

Browse files
committed
#381 Move scan formatters to a new "formattedcode" module
* this is a first step towards #381 * code and tests have been split from the scancode module * Also restore the ability to run ScanCode as a library by vendoring the NOTICE file. This was a regression introduced in #424 and tracked as #468 Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent d322513 commit 68d0a8a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+617
-391
lines changed

src/formattedcode/__init__.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
3+
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4+
# The ScanCode software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode require an acknowledgment.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# When you publish or redistribute any data created with ScanCode or any ScanCode
16+
# derivative work, you must accompany this data with the following acknowledgment:
17+
#
18+
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20+
# ScanCode should be considered or used as legal advice. Consult an Attorney
21+
# for any legal advice.
22+
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23+
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
File renamed without changes.

src/formattedcode/saver.py

+252
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#
2+
# Copyright (c) 2017 nexB Inc. and others. All rights reserved.
3+
# http://nexb.com and https://github.com/nexB/scancode-toolkit/
4+
# The ScanCode software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode require an acknowledgment.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# When you publish or redistribute any data created with ScanCode or any ScanCode
16+
# derivative work, you must accompany this data with the following acknowledgment:
17+
#
18+
# Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
19+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
20+
# ScanCode should be considered or used as legal advice. Consult an Attorney
21+
# for any legal advice.
22+
# ScanCode is a free software code scanning tool from nexB Inc. and others.
23+
# Visit https://github.com/nexB/scancode-toolkit/ for support and download.
24+
25+
from __future__ import absolute_import
26+
from __future__ import print_function
27+
from __future__ import division
28+
from __future__ import unicode_literals
29+
30+
from collections import OrderedDict
31+
from functools import partial
32+
from multiprocessing import Pool
33+
import os
34+
from os.path import expanduser
35+
from os.path import abspath
36+
import sys
37+
from time import time
38+
import traceback
39+
from types import GeneratorType
40+
41+
import click
42+
from click.termui import style
43+
44+
from commoncode import filetype
45+
from commoncode import fileutils
46+
from commoncode import ignore
47+
48+
from scancode import __version__ as version
49+
50+
from scancode.api import get_copyrights
51+
from scancode.api import get_emails
52+
from scancode.api import get_file_infos
53+
from scancode.api import get_licenses
54+
from scancode.api import get_package_infos
55+
from scancode.api import get_urls
56+
from scancode.api import _empty_file_infos
57+
58+
from scancode.cache import ScanFileCache
59+
from scancode.cache import get_scans_cache_class
60+
61+
from formattedcode.format import as_template
62+
from formattedcode.format import as_html_app
63+
from formattedcode.format import create_html_app_assets
64+
from formattedcode.format import HtmlAppAssetCopyWarning
65+
from formattedcode.format import HtmlAppAssetCopyError
66+
67+
68+
def save_formatted_output(scanners, files_count, version, notice, scanned_files, format, input, output_file, _echo):
69+
"""
70+
Save scan results to file or screen.
71+
"""
72+
73+
if format == 'html':
74+
for template_chunk in as_template(scanned_files):
75+
try:
76+
output_file.write(template_chunk)
77+
except Exception as e:
78+
extra_context = 'ERROR: Failed to write output to HTML for: ' + repr(template_chunk)
79+
_echo(extra_context, fg='red')
80+
e.args += (extra_context,)
81+
raise e
82+
83+
elif format == 'html-app':
84+
output_file.write(as_html_app(input, output_file))
85+
try:
86+
create_html_app_assets(scanned_files, output_file)
87+
except HtmlAppAssetCopyWarning:
88+
_echo('\nHTML app creation skipped when printing to stdout.', fg='yellow')
89+
except HtmlAppAssetCopyError:
90+
_echo('\nFailed to create HTML app.', fg='red')
91+
92+
elif format == 'json' or format == 'json-pp':
93+
import simplejson as json
94+
95+
meta = OrderedDict()
96+
meta['scancode_notice'] = notice
97+
meta['scancode_version'] = version
98+
meta['files_count'] = files_count
99+
# TODO: add scanning options to meta
100+
meta['files'] = scanned_files
101+
if format == 'json-pp':
102+
output_file.write(unicode(json.dumps(meta, indent=2 * ' ', iterable_as_array=True, encoding='utf-8')))
103+
else:
104+
output_file.write(unicode(json.dumps(meta, separators=(',', ':'), iterable_as_array=True, encoding='utf-8')))
105+
output_file.write('\n')
106+
107+
elif format in ('spdx-tv', 'spdx-rdf'):
108+
from spdx.checksum import Algorithm
109+
from spdx.creationinfo import Tool
110+
from spdx.document import Document, License
111+
from spdx.file import File
112+
from spdx.package import Package
113+
from spdx.utils import NoAssert
114+
from spdx.utils import SPDXNone
115+
from spdx.version import Version
116+
117+
input = abspath(input)
118+
119+
if os.path.isdir(input):
120+
input_path = input
121+
else:
122+
input_path = os.path.dirname(input)
123+
124+
doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
125+
126+
doc.creation_info.add_creator(Tool('ScanCode ' + version))
127+
doc.creation_info.set_created_now()
128+
129+
doc.package = Package(os.path.basename(input_path), NoAssert())
130+
131+
# Use a set of unique copyrights for the package.
132+
doc.package.cr_text = set()
133+
134+
all_files_have_no_license = True
135+
all_files_have_no_copyright = True
136+
137+
for file_data in scanned_files:
138+
# Construct the absolute path in case we need to access the file
139+
# to calculate its SHA1.
140+
file_entry = File(os.path.join(input_path, file_data.get('path')))
141+
142+
file_sha1 = file_data.get('sha1')
143+
if not file_sha1:
144+
if os.path.isfile(file_entry.name):
145+
# Calculate the SHA1 in case it is missing, e.g. for empty files.
146+
file_sha1 = file_entry.calc_chksum()
147+
else:
148+
# Skip directories.
149+
continue
150+
151+
# Restore the relative file name as that is what we want in
152+
# SPDX output (with explicit leading './').
153+
file_entry.name = './' + file_data.get('path')
154+
file_entry.chk_sum = Algorithm('SHA1', file_sha1)
155+
156+
file_licenses = file_data.get('licenses')
157+
if file_licenses:
158+
all_files_have_no_license = False
159+
for file_license in file_licenses:
160+
spdx_id = file_license.get('spdx_license_key')
161+
if spdx_id:
162+
spdx_license = License.from_identifier(spdx_id)
163+
else:
164+
license_key = 'LicenseRef-' + file_license.get('key')
165+
spdx_license = License(file_license.get('short_name'), license_key)
166+
167+
# Add licenses in the order they appear in the file. Maintaining the order
168+
# might be useful for provenance purposes.
169+
file_entry.add_lics(spdx_license)
170+
doc.package.add_lics_from_file(spdx_license)
171+
else:
172+
if file_licenses == None:
173+
all_files_have_no_license = False
174+
spdx_license = NoAssert()
175+
else:
176+
spdx_license = SPDXNone()
177+
178+
file_entry.add_lics(spdx_license)
179+
180+
file_entry.conc_lics = NoAssert()
181+
182+
file_copyrights = file_data.get('copyrights')
183+
if file_copyrights:
184+
all_files_have_no_copyright = False
185+
file_entry.copyright = []
186+
for file_copyright in file_copyrights:
187+
file_entry.copyright.extend(file_copyright.get('statements'))
188+
189+
doc.package.cr_text.update(file_entry.copyright)
190+
191+
# Create a text of copyright statements in the order they appear in the file.
192+
# Maintaining the order might be useful for provenance purposes.
193+
file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
194+
else:
195+
if file_copyrights == None:
196+
all_files_have_no_copyright = False
197+
spdx_copyright = NoAssert()
198+
else:
199+
spdx_copyright = SPDXNone()
200+
201+
file_entry.copyright = spdx_copyright
202+
203+
doc.package.add_file(file_entry)
204+
205+
if len(doc.package.files) == 0:
206+
if format == 'spdx-tv':
207+
output_file.write("# No results for package '{}'.\n".format(doc.package.name))
208+
else:
209+
output_file.write("<!-- No results for package '{}'. -->\n".format(doc.package.name))
210+
return
211+
212+
# Remove duplicate licenses from the list for the package.
213+
unique_licenses = set(doc.package.licenses_from_files)
214+
if len(doc.package.licenses_from_files) == 0:
215+
if all_files_have_no_license:
216+
doc.package.licenses_from_files = [SPDXNone()]
217+
else:
218+
doc.package.licenses_from_files = [NoAssert()]
219+
else:
220+
# List license identifiers alphabetically for the package.
221+
doc.package.licenses_from_files = sorted(unique_licenses, key = lambda x : x.identifier)
222+
223+
if len(doc.package.cr_text) == 0:
224+
if all_files_have_no_copyright:
225+
doc.package.cr_text = SPDXNone()
226+
else:
227+
doc.package.cr_text = NoAssert()
228+
else:
229+
# Create a text of alphabetically sorted copyright statements for the package.
230+
doc.package.cr_text = '\n'.join(sorted(doc.package.cr_text)) + '\n'
231+
232+
doc.package.verif_code = doc.package.calc_verif_code()
233+
doc.package.license_declared = NoAssert()
234+
doc.package.conc_lics = NoAssert()
235+
236+
# As the spdx-tools package can only write the document to a "str" file but ScanCode provides a "unicode" file,
237+
# write to a "str" buffer first and then manually write the value to a "unicode" file.
238+
from StringIO import StringIO
239+
240+
str_buffer = StringIO()
241+
242+
if format == 'spdx-tv':
243+
from spdx.writers.tagvalue import write_document
244+
write_document(doc, str_buffer)
245+
else:
246+
from spdx.writers.rdf import write_document
247+
write_document(doc, str_buffer)
248+
249+
output_file.write(str_buffer.getvalue())
250+
251+
else:
252+
raise Exception('Unknown format')

src/scancode/NOTICE

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
Software license
2+
================
3+
4+
Copyright (c) 2017 nexB Inc. and others. All rights reserved.
5+
http://nexb.com and https://github.com/nexB/scancode-toolkit/
6+
The ScanCode software is licensed under the Apache License version 2.0.
7+
Data generated with ScanCode require an acknowledgment.
8+
ScanCode is a trademark of nexB Inc.
9+
10+
You may not use this software except in compliance with the License.
11+
You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
12+
Unless required by applicable law or agreed to in writing, software distributed
13+
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
14+
CONDITIONS OF ANY KIND, either express or implied. See the License for the
15+
specific language governing permissions and limitations under the License.
16+
17+
When you publish or redistribute any data created with ScanCode or any ScanCode
18+
derivative work, you must accompany this data with the following acknowledgment:
19+
20+
Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
21+
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
22+
ScanCode should be considered or used as legal advice. Consult an Attorney
23+
for any legal advice.
24+
ScanCode is a free software code scanning tool from nexB Inc. and others.
25+
Visit https://github.com/nexB/scancode-toolkit/ for support and download.
26+
27+
28+
Third-party software licenses
29+
=============================
30+
31+
ScanCode embeds third-party free and open source software packages under various
32+
licenses including copyleft licenses. Some of the third-party software packages
33+
are delivered as pre-built binaries. The origin and license of these packages is
34+
documented by .ABOUT files.
35+
36+
The corresponding source code for pre-compiled third-party software is available
37+
for immediate download from the same release page where you obtained ScanCode at:
38+
https://github.com/nexB/scancode-toolkit/
39+
or https://github.com/nexB/scancode-thirdparty-src/
40+
41+
You may also contact us to request the source code by email at info@nexb.com or
42+
by postal mail at:
43+
44+
nexB Inc., ScanCode open source code request
45+
735 Industrial Road, Suite #101, 94070 San Carlos, CA, USA
46+
47+
Please indicate in your communication the ScanCode version for which you are
48+
requesting source code.
49+
50+
51+
License for ScanCode datasets
52+
=============================
53+
54+
ScanCode includes datasets (e.g. for license detection) that are dedicated
55+
to the Public Domain using the Creative Commons CC0 1.0 Universal (CC0 1.0)
56+
Public Domain Dedication: http://creativecommons.org/publicdomain/zero/1.0/

0 commit comments

Comments
 (0)