[ST Edge] v43 Http Socket Timeout

I’m starting up a new thread to dive into continued socket timeouts on v43 hub firmware. I have one driver that works fine on v42 but is seeing this error on all calls for v43.

POST https://192.168.X.XX:443/api [string “socket”]:1535: timeout

This driver does not use the lua socket api directly. It always goes through cosock in one of these two methods:

local socket = require 'cosock.socket' -- used for udp socket in discovery (WORKS)
local http = cosock.asyncify 'socket.http' -- used for http REST calls (TIMEOUT)

This error exactly matches the error mentioned in this thread, but the fix mentioned here by @robert.masen hasn’t resolved it [SmartThings Edge] Issue with HTTP requests in LAN drivers - #4 by robert.masen

What else can I try? The device is the Philips Hue Hub. If anyone else can try to make a call to their hub on v43, that would be appreciated.

Do you have a link to the driver’s code that I could look at?

1 Like

Thank you for taking a look at it. The code isn’t posted anywhere, but here is the HttpClient that I use in the driver. This is an example usage:

local HttpClient = require 'httpclient'

local client = HttpClient:new({
    ip = ip,
    port = port
})

local request = client:request({
        method = 'POST',
        route = 'api',
        body = {
            devicetype = driver.NAME .. '#' .. dni,
            generateclientkey = true
        }
    })

This is the module code:

local log = require 'log'
local json = require 'st.json'
local ltn12 = require 'ltn12'
local utils = require 'st.utils'
local collection = require "collection"

local socket = require 'cosock.socket'
local cosock = require 'cosock'
-- `cosock.asyncify` works like `require` but swaps out any sockets
-- created to be cosock sockets. This is important because we
-- are running inside of a command handler.
local http = cosock.asyncify 'socket.http'

local HttpClient = {
    scheme = 'https',
    ip = nil,
    port = nil,
    headers = {}
}

function HttpClient:new(o)
    return setmetatable(o or {}, {
        __index = self,
        __tostring = self.toString
    })
end

function HttpClient.isSuccess(code)
    local ncode = tonumber(code)
    return ncode ~= nil and ncode >= 200 and ncode < 300
end

function HttpClient.isTruncated(code)
    return code ~= nil and type(code) == "string" and
        (code:match('socket.*closed') ~= nil or code:match('socket.*does not exist') ~= nil)
end

function HttpClient.isTimeout(code)
    return code == 'timeout'
end

function HttpClient.isUnreachable(code)
    return code ~= nil and type(code) == "string" and (
        code:match('HostUnreachable') ~= nil or
            code:match('could not parse host') ~= nil
        )
end

function HttpClient:isReady()
    return self.scheme ~= nil and self.ip ~= nil
end

-- @param args.method (required)
-- @param args.port (optional)
-- @param args.headers (optional)
-- @param args.route (optional)
-- @param args.body (optional)
function HttpClient:request(args)
    local host = self.ip
    local port = args.port or self.port
    if port then
        host = string.format('%s:%s', host, port)
    end
    local url = string.format('%s://%s/%s', self.scheme, host, args.route or '')

    -- overlay headers (client --> args --> content)
    local encoded = json.encode(args.body or {})
    local headers = utils.merge(utils.merge({
        ['Content-Type'] = 'application/json',
        ['Content-Length'] = encoded:len()
    }, args.headers or {}), self.headers or {})

    local result = {}

    local logRequest = function(level, method, url, code)
        level(string.format('%s %s %s', method, url, code))
    end

    local lastAttempt = 2

    for attempt = 0, lastAttempt do
        local raw = {}

        local _, code, _, status = http.request {
            method = args.method,
            url = url,
            headers = headers,
            source = ltn12.source.string(encoded),
            sink = ltn12.sink.table(raw),
            create = function()
                local sock = socket.tcp()

                -- modify receive call for prefix bug in ST tcp implementation
                -- allows code prior to a fix to work with cosock asyncify
                -- https://github.com/cosock/cosock/issues/26
                function sock:receive(pattern, prefix)
                    local data = socket.tcp.receive(self, pattern)
                    if prefix ~= nil and data ~= nil then
                        return prefix .. data
                    else
                        return data
                    end
                end

                return sock
            end
        }

        result.code = code
        result.status = status
        result.raw = raw

        if code == 200 then
            logRequest(log.debug, args.method, url, code)
            break
        elseif self.isSuccess(code) then
            logRequest(log.warn, args.method, url, code)
            break
        elseif self.isUnreachable(code) then
            logRequest(log.error, args.method, url, code)
            break
        elseif self.isTimeout(code) and attempt < lastAttempt then
            logRequest(log.warn, args.method, url, code)
            -- loop and retry
        elseif self.isTruncated(code) and attempt < lastAttempt then
            logRequest(log.warn, args.method, url, code)
            -- loop and retry after exponential backoff
            socket.sleep(math.floor(2 ^ attempt))
        else
            logRequest(log.error, args.method, url, code)
            break
        end
    end

    local response = {}
    if not self.isTruncated(result.code) and #result.raw > 0 then
        response = json.decode(table.concat(result.raw))
        if response and response.errors and #response.errors > 0 then
            collection(response.errors):each(function(_, err)
                log.error(err.description)
            end)
        end
    end

    return {
        code = result.code,
        status = result.status,
        response = response
    }
end

if require then
    return HttpClient
end

Wow, thank you for taking the time to provide that and for your continued efforts to get these bugs ironed out.

We have a bug fix in the pipeline to make this all easier but in the mean time I am confident that we can get this working.

TLDR:
If you swap the following line your driver should work:

-- local http = cosock.asyncify 'socket.http'
local http = cosock.asyncify 'ssl.https`

Right now there is a bug in cosock.asyncify that only allows it to work on the first level of the module you are importing and there is a somewhat strange relationship between the LuaSocket and LuaSec projects. Mainly that LuaSocket’s socket.http library lazily imports the LuaSec’s ssl.https module which in turn imports socket, which means that asyncify works on any request with a url that has an http scheme but will fail with an https scheme :neutral_face:.

Please let me know if the above does or does not fix your problem

2 Likes

Thank you for confirming. I had a feeling that is what I was hitting when I saw this PR. I appreciate the quick feedback. This should be good to get things rolling again for the alpha test group. Much appreciated.

1 Like

Upon swapping this out, I see two different issues:

  1. I get a failure that I can’t modify the create call, which I am using to workaround the “prefix bug”
create function not permitted
create = function()
    local sock = socket.tcp()

    -- modify receive call for prefix bug in ST tcp implementation
    -- allows code prior to a fix to work with cosock asyncify
    -- https://github.com/cosock/cosock/issues/26
    function sock:receive(pattern, prefix)
        local data = socket.tcp.receive(self, pattern)
        if prefix ~= nil and data ~= nil then
            return prefix .. data
        else
            return data
        end
    end

    return sock
end
  1. Even after removing that create call and possibly only using it on http, I still see this error:
[string "ssl/https.lua"]:68: attempt to index a nil value (field 'st_ssl')

@robert.masen Just a note, but that error is seen on v42. I’m trying to find a solution that works on both v42 and v43. I haven’t tried this on v43 yet

Edit:

This is also confirmed to produce the same error on v43 firmware as well.

Unfortunately to be able to use https request on 0.42 the only solution is to shim the ssl.https module into your driver so it is correctly using the cosock module but not referencing the invalid property in that error message.

I can try to find something for that, but the error also occurs on v43. Any insight there? Do you think I should just bundle all of the ST lua libs into the driver so I can modify them?

Additionally, is there anything that we can use to help us with version specific patches? I could also add some logic that is specific to v42 or v43 if there is a way to figure that out. Previously, we were told that the version is not available, but I would expect something to be present in the system.

I am grepping for st_ssl in the 42.X lua libs but it doesn’t appear to be in there. The only reference is the one that is failing

ssl/https.lua:      self.sock = try(socket.st_ssl.wrap(self.sock, params))

How does that get introduced?

How does that get introduced?

This bug is in the file located in ssl/https.lua on line 68 in the lua-libs. This file is supposed to mirror the luasec implementation but the above is a bug has been in that code for quite some time.

Do you think I should just bundle all of the ST lua libs into the driver

I don’t think you need to do anything this drastic. I believe that you should be able to just include a directory called ssl and a file in that called https.lua in your driver package with the code from this file from luasec, and then calling cosock.asynicify "ssl.https" should resolve that file over the system version.

If that doesn’t work let me know that I can provide a shim

1 Like

Thanks @robert.masen . Unfortunately I am still seeing the timeout errors after trying this. It solved the error for st_ssl but the timeout persists.

Thanks for the update, sorry that this is so difficult to get fixed. I will have a work around for you tomorrow, hopefully pretty early in the day.

1 Like

I appreciate it. I’m surprised nobody else is reporting this on v43. Not sure what I could be doing that is unique here. It’s just an https LAN driver. The crossover between v43 and Edge must not be very big.

Is your workaround still in place? I am wondering if that is confusing the issue.

I think what is really needed is for ST to go balls to the wall with the cosock fixes and get them in the 43 beta. It’s no use as it is.

I am wondering if that is confusing the issue.

I do not expect that to be the case. The issues outlined here should not be impacted by adding a create method that strips the extra prefix argument to each chunk.

the cosock fixes and get them in the 43 beta

We are in the process of getting these bug fixes into 0.43

Sorry, I am still working on getting a workaround written up for you, things have been rather busy today.

I’m surprised nobody else is reporting this on v43

So far we haven’t had a lot of run time on lan drivers across the board. The ones I have seen have all used the socket.tcp interface directly instead of using the socket.http/ssl.https interfaces.

One option you might consider is using something like Luncheon which is an http parsing library with built in support for cosock. There are still some rough edges here (namely that chunk encoding isn’t handled out of the box). That library is available in drivers via require("luncheon") if you wanted to explore that.

I am still hopeful I can get you a workaround today, but at the very latest it should be tomorrow.

1 Like

Fair enough. I was just wondering if the create method somehow used the ‘wrong’ sock:receive or something and bypassed the asyncify stuff but I try not to think about it.

As an aside, is the example code on LAN Edge Device Driver Development Guide | SmartThings Developers correct? To me it looks like it is testing status when it means code.

I believe the only thing missing here is a proper asyncify. I have pulled to patched asynicify logic out of cosock, this could be provided as a module in your driver.

local m = {}
local realrequire = require

local function wrappedrequire(name)
  local subbed
  if name == "socket" then
    subbed = "cosock.socket"
  elseif name == "ssl" then
    subbed = "cosock.ssl"
  end
  return subbed and realrequire(subbed) or m.asyncify(name)
end

local fakeenv = {
  require = wrappedrequire
}
setmetatable(fakeenv, {__index = _ENV })

function m.asyncify(name)
  local err
  if name == "socket" then name = "cosock.socket" end
  if name == "ssl" then name = "cosock.ssl" end
  package.asyncify_loaded = package.asyncify_loaded or {}
  if package.asyncify_loaded[name] then
    return package.asyncify_loaded[name]
  end
  for _,searcher in ipairs(package.searchers) do
    local loader, loc = searcher(name)
    if type(loader) == "function" then
      -- figure out which upvalue is env
      local upvalueidx = 0
      repeat
        upvalueidx = upvalueidx + 1
        local uvname, _ = debug.getupvalue(loader, upvalueidx)
        if not uvname then upvalueidx = nil; break end
      until uvname == "_ENV"
      
      -- set loader env to fake env to override calls to `require`
      if upvalueidx then
        debug.setupvalue(loader, upvalueidx, fakeenv)
      end
      
      -- load module with loader
      local module = loader(name, loc)
      package.asyncify_loaded[name] = module
      return module
      -- If the last searcher returns nil we need to check the
      -- package.loaded or we may miss some std lib packages
    elseif loader == nil and package.loaded[name] then
      package.asyncify_loaded[name] = package.loaded[name]
      return package.loaded[name]
    else
      -- non-function values mean error, keep last non-nil value
      err = loader or err
    end
  end
  
  return err
end

return m.asyncify

For example if that was located in a file called cosockify.lua you could add the following to your driver.

local cosockify = require "cosockify"
local https = cosockify "ssl.https" -- this should resolve to your driver's ssl/https.lua module

https.request {}
--...