Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if(EMSCRIPTEN)
add_compile_options("-sMEMORY64")
add_compile_options("-msimd128")
add_compile_options("-pthread")
add_link_options("-sALLOW_MEMORY_GROWTH")
add_link_options("-sMAXIMUM_MEMORY=16GB")
add_link_options("-sNODERAWFS")
add_link_options("-sMEMORY64")
add_link_options("-sSTACK_SIZE=2MB")
add_link_options("-pthread")
add_link_options("-sPROXY_TO_PTHREAD")
endif()

FetchContent_Declare(highway GIT_REPOSITORY https://github.com/google/highway.git GIT_TAG 3b680cde3a556bead9cc23c8f595d07a44d5a0d5 EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(highway)

Expand Down Expand Up @@ -60,6 +73,9 @@ set(BENCHMARK_ENABLE_GTEST_TESTS OFF)

FetchContent_Declare(benchmark GIT_REPOSITORY https://github.com/google/benchmark.git GIT_TAG v1.8.2 EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(benchmark)
if(EMSCRIPTEN)
target_compile_options(benchmark PRIVATE -Wno-c2y-extensions)
endif()

# Base source files
set(SOURCES
Expand Down
2 changes: 2 additions & 0 deletions compression/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ namespace gcpp {
// yet use any AVX 10.2 features.
#define GEMMA_DISABLED_TARGETS \
(HWY_SCALAR | HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | HWY_AVX10_2)
#elif HWY_ARCH_WASM
#define GEMMA_DISABLED_TARGETS HWY_SCALAR
#endif // HWY_ARCH_*

#endif // GEMMA_DISABLED_TARGETS
Expand Down
57 changes: 44 additions & 13 deletions gemma/api_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,15 @@ class APIClient {
use_https_(port == 443),
interactive_mode_(false) {
if (use_https_) {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
ssl_client_ = std::make_unique<httplib::SSLClient>(host, port);
ssl_client_->set_read_timeout(60, 0);
ssl_client_->set_write_timeout(60, 0);
ssl_client_->enable_server_certificate_verification(false);
#else
std::cerr << "Error: HTTPS requested but OpenSSL not found." << std::endl;
exit(1);
#endif
} else {
client_ = std::make_unique<httplib::Client>(host, port);
client_->set_read_timeout(60, 0);
Expand Down Expand Up @@ -109,8 +114,17 @@ class APIClient {
if (!api_key_.empty()) {
headers.emplace("X-goog-api-key", api_key_);
}
auto res = use_https_ ? ssl_client_->Get("/v1beta/models", headers)
: client_->Get("/v1beta/models", headers);
httplib::Result res;
if (use_https_) {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
res = ssl_client_->Get("/v1beta/models", headers);
#else
std::cerr << "Error: HTTPS requested but OpenSSL not found." << std::endl;
exit(1);
#endif
} else {
res = client_->Get("/v1beta/models", headers);
}

if (res && res->status == 200) {
json response = json::parse(res->body);
Expand Down Expand Up @@ -213,11 +227,17 @@ class APIClient {
if (!api_key_.empty()) {
headers.emplace("X-goog-api-key", api_key_);
}

auto res = use_https_ ? ssl_client_->Post(endpoint, headers, request.dump(),
"application/json")
: client_->Post(endpoint, headers, request.dump(),
"application/json");
httplib::Result res;
if (use_https_) {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
res = ssl_client_->Post(endpoint, headers, request.dump(), "application/json");
#else
std::cerr << "Error: HTTPS requested but OpenSSL not found." << std::endl;
exit(1);
#endif
} else {
res = client_->Post(endpoint, headers, request.dump(), "application/json");
}

if (res && res->status == 200) {
json response = json::parse(res->body);
Expand Down Expand Up @@ -300,8 +320,17 @@ class APIClient {

httplib::Response res;
httplib::Error error;
bool success = use_https_ ? ssl_client_->send(req, res, error)
: client_->send(req, res, error);
bool success = false;
if (use_https_) {
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
success = ssl_client_->send(req, res, error);
#else
std::cerr << "Error: HTTPS requested but OpenSSL not found." << std::endl;
exit(1);
#endif
} else {
success = client_->send(req, res, error);
}

if (res.status == 200 && !accumulated_response.empty()) {
return json{
Expand All @@ -322,7 +351,9 @@ class APIClient {

private:
std::unique_ptr<httplib::Client> client_;
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
std::unique_ptr<httplib::SSLClient> ssl_client_;
#endif
std::string host_;
int port_;
std::string api_key_;
Expand Down Expand Up @@ -394,11 +425,11 @@ int main(int argc, char* argv[]) {
client_args.port = 443;
}

std::cout << BOLD << YELLOW << "🚀 Testing API Server at " << client_args.host
<< ":" << client_args.port << RESET << std::endl;
std::cout << gcpp::BOLD << gcpp::YELLOW << "🚀 Testing API Server at " << client_args.host
<< ":" << client_args.port << gcpp::RESET << std::endl;

try {
APIClient client(client_args.host, client_args.port, client_args.api_key,
gcpp::APIClient client(client_args.host, client_args.port, client_args.api_key,
client_args.model);

if (client_args.interactive) {
Expand All @@ -408,7 +439,7 @@ int main(int argc, char* argv[]) {
client.TestGenerateContent(client_args.prompt, true);
}
} catch (const std::exception& e) {
std::cerr << RED << "❌ Error: " << e.what() << RESET << std::endl;
std::cerr << gcpp::RED << "❌ Error: " << e.what() << gcpp::RESET << std::endl;
std::cerr << "Make sure the API server is running:" << std::endl;
std::cerr
<< " ./build/gemma_api_server --tokenizer <path> --weights <path>"
Expand Down
9 changes: 9 additions & 0 deletions util/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ size_t DetectPageSize() {
size_t len = sizeof(data);
HWY_ASSERT(sysctlbyname("vm.pagesize", &data, &len, nullptr, 0) == 0);
return data;
#elif defined(__EMSCRIPTEN__)
// Pages in Wasm are always 64KiB.
return 65536;
#else
return 0;
#endif
Expand All @@ -123,6 +126,9 @@ size_t DetectTotalMiB(size_t page_bytes) {
HWY_ASSERT(sysctl(mib, sizeof(mib) / sizeof(*mib), &data, &len, nullptr, 0) ==
0);
return data >> 20;
#elif defined(__EMSCRIPTEN__)
// The maximum linear memory in Wasm is currently specified at 16GiB.
return 16384;
#else
#error "Port"
#endif
Expand Down Expand Up @@ -199,6 +205,9 @@ size_t Allocator::FreeMiB() const {
sysctlbyname("vm.page_inactive_count", &inactive, &len, nullptr, 0);
sysctlbyname("vm.page_speculative_count", &speculative, &len, nullptr, 0);
return (free + inactive + speculative) * base_page_bytes_ >> 20;
#elif defined(__EMSCRIPTEN__)
// There's no way to emulate this in emscripten so we lie.
return 16384;
#else
#error "Port"
#endif
Expand Down
4 changes: 4 additions & 0 deletions util/threading.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ NestedPools::NestedPools(const BoundedTopology& topology,
const Allocator& allocator, size_t max_threads,
Tristate pin)
: pinning_(pin) {
#ifdef __EMSCRIPTEN__
// Node runs out of memory with a large number of workers. Cap it for now.
if (max_threads == 0 || max_threads > 32) max_threads = 32;
#endif
const size_t num_clusters = topology.NumClusters();
const size_t cluster_workers_cap = DivideMaxAcross(max_threads, num_clusters);

Expand Down
Loading